-
Notifications
You must be signed in to change notification settings - Fork 326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
expfmt: Add UTF-8 syntax support in text_parse.go #670
Merged
beorn7
merged 17 commits into
prometheus:main
from
fedetorres93:ftorres/text-parse-utf-8
Aug 21, 2024
+381
−22
Merged
Changes from 8 commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
9b8c348
Update expfmt/text_parse.go to support the new UTF-8 syntax
fedetorres93 f188861
Fix breaking unit tests
fedetorres93 dcfa945
Update unit test
fedetorres93 2fa0278
Fix startLabelName stateFn
fedetorres93 d2d947d
Updated unit tests
fedetorres93 db7d143
Tidy up code
fedetorres93 5b9fece
Fix format
fedetorres93 076fba3
Fix imports
fedetorres93 ad09b93
Remove unnecessary else statement
fedetorres93 2e984ce
Change currentLabelPairs field name and add descriptive comment
fedetorres93 58dd08f
More explicit test descriptions
fedetorres93 52f4fb0
Fix quoted metric and label names reading
fedetorres93 91f4f80
Add additional text parse test case
fedetorres93 6962341
Fix multiple metric names in label set error
fedetorres93 d1f07cd
Merge remote-tracking branch 'upstream/main' into ftorres/text-parse-…
fedetorres93 33e2fcc
Add additional text parse error test case
fedetorres93 e76d9f5
Added more test cases
fedetorres93 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,9 +22,9 @@ import ( | |
"math" | ||
"strconv" | ||
"strings" | ||
"unicode/utf8" | ||
|
||
dto "github.com/prometheus/client_model/go" | ||
|
||
"google.golang.org/protobuf/proto" | ||
|
||
"github.com/prometheus/common/model" | ||
|
@@ -60,6 +60,7 @@ type TextParser struct { | |
currentMF *dto.MetricFamily | ||
currentMetric *dto.Metric | ||
currentLabelPair *dto.LabelPair | ||
currentLabel []*dto.LabelPair | ||
|
||
// The remaining member variables are only used for summaries/histograms. | ||
currentLabels map[string]string // All labels including '__name__' but excluding 'quantile'/'le' | ||
|
@@ -74,6 +75,7 @@ type TextParser struct { | |
// count and sum of that summary/histogram. | ||
currentIsSummaryCount, currentIsSummarySum bool | ||
currentIsHistogramCount, currentIsHistogramSum bool | ||
currentMetricIsInsideBraces bool | ||
} | ||
|
||
// TextToMetricFamilies reads 'in' as the simple and flat text-based exchange | ||
|
@@ -137,12 +139,14 @@ func (p *TextParser) reset(in io.Reader) { | |
} | ||
p.currentQuantile = math.NaN() | ||
p.currentBucket = math.NaN() | ||
p.currentMF = nil | ||
} | ||
|
||
// startOfLine represents the state where the next byte read from p.buf is the | ||
// start of a line (or whitespace leading up to it). | ||
func (p *TextParser) startOfLine() stateFn { | ||
p.lineCount++ | ||
p.currentMetricIsInsideBraces = false | ||
if p.skipBlankTab(); p.err != nil { | ||
// This is the only place that we expect to see io.EOF, | ||
// which is not an error but the signal that we are done. | ||
|
@@ -158,6 +162,9 @@ func (p *TextParser) startOfLine() stateFn { | |
return p.startComment | ||
case '\n': | ||
return p.startOfLine // Empty line, start the next one. | ||
case '{': | ||
p.currentMetricIsInsideBraces = true | ||
return p.readingLabels | ||
} | ||
return p.readingMetricName | ||
} | ||
|
@@ -275,6 +282,8 @@ func (p *TextParser) startLabelName() stateFn { | |
return nil // Unexpected end of input. | ||
} | ||
if p.currentByte == '}' { | ||
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabel...) | ||
p.currentLabel = nil | ||
if p.skipBlankTab(); p.err != nil { | ||
return nil // Unexpected end of input. | ||
} | ||
|
@@ -287,6 +296,35 @@ func (p *TextParser) startLabelName() stateFn { | |
p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName())) | ||
return nil | ||
} | ||
if p.skipBlankTabIfCurrentBlankTab(); p.err != nil { | ||
return nil // Unexpected end of input. | ||
} | ||
if p.currentByte != '=' { | ||
if p.currentMetricIsInsideBraces { | ||
switch p.currentByte { | ||
case ',': | ||
p.setOrCreateCurrentMF() | ||
p.currentMetric = &dto.Metric{} | ||
return p.startLabelName | ||
case '}': | ||
p.setOrCreateCurrentMF() | ||
p.currentMetric = &dto.Metric{} | ||
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabel...) | ||
p.currentLabel = nil | ||
if p.skipBlankTab(); p.err != nil { | ||
return nil // Unexpected end of input. | ||
} | ||
return p.readingValue | ||
default: | ||
p.parseError(fmt.Sprintf("unexpected end of metric value %q", p.currentByte)) | ||
return nil | ||
} | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All code paths in the |
||
p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte)) | ||
p.currentLabel = nil | ||
return nil | ||
} | ||
} | ||
p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())} | ||
if p.currentLabelPair.GetName() == string(model.MetricNameLabel) { | ||
p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel)) | ||
|
@@ -296,23 +334,17 @@ func (p *TextParser) startLabelName() stateFn { | |
// labels to 'real' labels. | ||
if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == model.QuantileLabel) && | ||
!(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == model.BucketLabel) { | ||
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPair) | ||
} | ||
if p.skipBlankTabIfCurrentBlankTab(); p.err != nil { | ||
return nil // Unexpected end of input. | ||
} | ||
if p.currentByte != '=' { | ||
p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte)) | ||
return nil | ||
p.currentLabel = append(p.currentLabel, p.currentLabelPair) | ||
} | ||
// Check for duplicate label names. | ||
labels := make(map[string]struct{}) | ||
for _, l := range p.currentMetric.Label { | ||
for _, l := range p.currentLabel { | ||
lName := l.GetName() | ||
if _, exists := labels[lName]; !exists { | ||
labels[lName] = struct{}{} | ||
} else { | ||
p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName())) | ||
p.currentLabel = nil | ||
return nil | ||
} | ||
} | ||
|
@@ -345,6 +377,7 @@ func (p *TextParser) startLabelValue() stateFn { | |
if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil { | ||
// Create a more helpful error message. | ||
p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue())) | ||
p.currentLabel = nil | ||
return nil | ||
} | ||
} else { | ||
|
@@ -371,12 +404,19 @@ func (p *TextParser) startLabelValue() stateFn { | |
return p.startLabelName | ||
|
||
case '}': | ||
if p.currentMF == nil { | ||
p.parseError("invalid metric name") | ||
return nil | ||
} | ||
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabel...) | ||
p.currentLabel = nil | ||
if p.skipBlankTab(); p.err != nil { | ||
return nil // Unexpected end of input. | ||
} | ||
return p.readingValue | ||
default: | ||
p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue())) | ||
p.currentLabel = nil | ||
return nil | ||
} | ||
} | ||
|
@@ -585,6 +625,8 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) { | |
p.currentToken.WriteByte(p.currentByte) | ||
case 'n': | ||
p.currentToken.WriteByte('\n') | ||
case '"': | ||
p.currentToken.WriteByte('"') | ||
default: | ||
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte)) | ||
return | ||
|
@@ -610,13 +652,46 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) { | |
// but not into p.currentToken. | ||
func (p *TextParser) readTokenAsMetricName() { | ||
p.currentToken.Reset() | ||
// A UTF-8 metric name must be quoted and may have escaped characters. | ||
quoted := false | ||
escaped := false | ||
if !isValidMetricNameStart(p.currentByte) { | ||
return | ||
} | ||
for { | ||
p.currentToken.WriteByte(p.currentByte) | ||
for p.err == nil { | ||
if escaped { | ||
switch p.currentByte { | ||
case '\\': | ||
p.currentToken.WriteByte(p.currentByte) | ||
case 'n': | ||
p.currentToken.WriteByte('\n') | ||
case '"': | ||
p.currentToken.WriteByte('\\') | ||
p.currentToken.WriteByte('"') | ||
default: | ||
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte)) | ||
return | ||
} | ||
escaped = false | ||
} else { | ||
switch p.currentByte { | ||
case '"': | ||
quoted = !quoted | ||
if !quoted { | ||
p.currentByte, p.err = p.buf.ReadByte() | ||
return | ||
} | ||
case '\n': | ||
p.parseError(fmt.Sprintf("metric name %q contains unescaped new-line", p.currentToken.String())) | ||
return | ||
case '\\': | ||
escaped = true | ||
default: | ||
p.currentToken.WriteByte(p.currentByte) | ||
} | ||
} | ||
p.currentByte, p.err = p.buf.ReadByte() | ||
if p.err != nil || !isValidMetricNameContinuation(p.currentByte) { | ||
if !isValidMetricNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == ' ') { | ||
return | ||
} | ||
} | ||
|
@@ -628,13 +703,46 @@ func (p *TextParser) readTokenAsMetricName() { | |
// but not into p.currentToken. | ||
func (p *TextParser) readTokenAsLabelName() { | ||
p.currentToken.Reset() | ||
// A UTF-8 label name must be quoted and may have escaped characters. | ||
quoted := false | ||
escaped := false | ||
if !isValidLabelNameStart(p.currentByte) { | ||
return | ||
} | ||
for { | ||
p.currentToken.WriteByte(p.currentByte) | ||
for p.err == nil { | ||
if escaped { | ||
switch p.currentByte { | ||
case '\\': | ||
p.currentToken.WriteByte(p.currentByte) | ||
case 'n': | ||
p.currentToken.WriteByte('\n') | ||
case '"': | ||
p.currentToken.WriteByte('\\') | ||
p.currentToken.WriteByte('"') | ||
default: | ||
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte)) | ||
return | ||
} | ||
escaped = false | ||
} else { | ||
switch p.currentByte { | ||
case '"': | ||
quoted = !quoted | ||
if !quoted { | ||
p.currentByte, p.err = p.buf.ReadByte() | ||
return | ||
} | ||
case '\n': | ||
p.parseError(fmt.Sprintf("label name %q contains unescaped new-line", p.currentToken.String())) | ||
return | ||
case '\\': | ||
escaped = true | ||
default: | ||
p.currentToken.WriteByte(p.currentByte) | ||
} | ||
} | ||
p.currentByte, p.err = p.buf.ReadByte() | ||
if p.err != nil || !isValidLabelNameContinuation(p.currentByte) { | ||
if !isValidLabelNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == '=') { | ||
return | ||
} | ||
} | ||
|
@@ -660,6 +768,7 @@ func (p *TextParser) readTokenAsLabelValue() { | |
p.currentToken.WriteByte('\n') | ||
default: | ||
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte)) | ||
p.currentLabel = nil | ||
return | ||
} | ||
escaped = false | ||
|
@@ -718,19 +827,19 @@ func (p *TextParser) setOrCreateCurrentMF() { | |
} | ||
|
||
func isValidLabelNameStart(b byte) bool { | ||
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' | ||
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '"' | ||
} | ||
|
||
func isValidLabelNameContinuation(b byte) bool { | ||
return isValidLabelNameStart(b) || (b >= '0' && b <= '9') | ||
func isValidLabelNameContinuation(b byte, quoted bool) bool { | ||
return isValidLabelNameStart(b) || (b >= '0' && b <= '9') || (quoted && utf8.ValidString(string(b))) | ||
} | ||
|
||
func isValidMetricNameStart(b byte) bool { | ||
return isValidLabelNameStart(b) || b == ':' | ||
} | ||
|
||
func isValidMetricNameContinuation(b byte) bool { | ||
return isValidLabelNameContinuation(b) || b == ':' | ||
func isValidMetricNameContinuation(b byte, quoted bool) bool { | ||
return isValidLabelNameContinuation(b, quoted) || b == ':' | ||
} | ||
|
||
func isBlankOrTab(b byte) bool { | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know that protobuf uses the singular even for repeated messages. But maybe it would still make more sense and be more readable to use the plural here?
currentLabels
collides with the map of the same name below, but maybe call thiscurrentLabelPairs
? Also, is there a good comment to add here to explain what this variable is for? (cf. the comment for thecurrentLabels
variable below).