diff --git a/docs/sources/shared/configuration.md b/docs/sources/shared/configuration.md index 208def0cdd52..674bb09ff0b9 100644 --- a/docs/sources/shared/configuration.md +++ b/docs/sources/shared/configuration.md @@ -2925,8 +2925,10 @@ The `limits_config` block configures global and per-tenant limits in Loki. The v [discover_service_name: | default = [service app application name app_kubernetes_io_name container container_name component workload job]] # Discover and add log levels during ingestion, if not present already. Levels -# would be added to Structured Metadata with name 'level' and one of the values -# from 'debug', 'info', 'warn', 'error', 'critical', 'fatal'. +# would be added to Structured Metadata with name +# level/LEVEL/Level/Severity/severity/SEVERITY/lvl/LVL/Lvl (case-sensitive) and +# one of the values from 'trace', 'debug', 'info', 'warn', 'error', 'critical', +# 'fatal' (case insensitive). # CLI flag: -validation.discover-log-levels [discover_log_levels: | default = true] diff --git a/go.mod b/go.mod index 06969fdeed2e..f1779c1be01b 100644 --- a/go.mod +++ b/go.mod @@ -119,6 +119,7 @@ require ( github.com/IBM/go-sdk-core/v5 v5.13.1 github.com/IBM/ibm-cos-sdk-go v1.10.0 github.com/axiomhq/hyperloglog v0.0.0-20240124082744-24bca3a5b39b + github.com/buger/jsonparser v1.1.1 github.com/d4l3k/messagediff v1.2.1 github.com/dolthub/swiss v0.2.1 github.com/efficientgo/core v1.0.0-rc.2 diff --git a/go.sum b/go.sum index a873e59ac8dd..df44df356fc8 100644 --- a/go.sum +++ b/go.sum @@ -401,6 +401,8 @@ github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9 github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= +github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= +github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b h1:6+ZFm0flnudZzdSE0JxlhR2hKnGPcNB35BjQf4RYQDY= github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= github.com/caddyserver/caddy v1.0.4/go.mod h1:uruyfVsyMcDb3IOzSKsi1x0wOjy1my/PxOSTcD+24jM= diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index eae29a57c905..268db96e897a 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -12,8 +12,10 @@ import ( "time" "unicode" + "github.com/buger/jsonparser" "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/go-logfmt/logfmt" "github.com/gogo/status" "github.com/prometheus/prometheus/model/labels" "go.opentelemetry.io/collector/pdata/plog" @@ -59,13 +61,15 @@ const ( labelServiceName = "service_name" serviceUnknown = "unknown_service" - labelLevel = "level" + levelLabel = "detected_level" logLevelDebug = "debug" logLevelInfo = "info" logLevelWarn = "warn" logLevelError = "error" logLevelFatal = "fatal" logLevelCritical = "critical" + logLevelTrace = "trace" + logLevelUnknown = "unknown" ) var ( @@ -73,6 +77,12 @@ var ( rfStats = analytics.NewInt("distributor_replication_factor") ) +var allowedLabelsForLevel = map[string]struct{}{ + "level": {}, "LEVEL": {}, "Level": {}, + "severity": {}, "SEVERITY": {}, "Severity": {}, + "lvl": {}, "LVL": {}, "Lvl": {}, +} + // Config for a Distributor. type Config struct { // Distributors ring @@ -376,7 +386,9 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log n := 0 pushSize := 0 prevTs := stream.Entries[0].Timestamp - addLogLevel := validationContext.allowStructuredMetadata && validationContext.discoverLogLevels && !lbs.Has(labelLevel) + + shouldDiscoverLevels := validationContext.allowStructuredMetadata && validationContext.discoverLogLevels + levelFromLabel, hasLevelLabel := hasAnyLevelLabels(lbs) for _, entry := range stream.Entries { if err := d.validator.ValidateEntry(ctx, validationContext, lbs, entry); err != nil { d.writeFailuresManager.Log(tenantID, err) @@ -385,12 +397,21 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log } structuredMetadata := logproto.FromLabelAdaptersToLabels(entry.StructuredMetadata) - if addLogLevel && !structuredMetadata.Has(labelLevel) { - logLevel := detectLogLevelFromLogEntry(entry, structuredMetadata) - entry.StructuredMetadata = append(entry.StructuredMetadata, logproto.LabelAdapter{ - Name: labelLevel, - Value: logLevel, - }) + if shouldDiscoverLevels { + var logLevel string + if hasLevelLabel { + logLevel = levelFromLabel + } else if levelFromMetadata, ok := hasAnyLevelLabels(structuredMetadata); ok { + logLevel = levelFromMetadata + } else { + logLevel = detectLogLevelFromLogEntry(entry, structuredMetadata) + } + if logLevel != logLevelUnknown && logLevel != "" { + entry.StructuredMetadata = append(entry.StructuredMetadata, logproto.LabelAdapter{ + Name: levelLabel, + Value: logLevel, + }) + } } stream.Entries[n] = entry @@ -537,6 +558,15 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log } } +func hasAnyLevelLabels(l labels.Labels) (string, bool) { + for lbl := range allowedLabelsForLevel { + if l.Has(lbl) { + return l.Get(lbl), true + } + } + return "", false +} + // shardStream shards (divides) the given stream into N smaller streams, where // N is the sharding size for the given stream. shardSteam returns the smaller // streams and their associated keys for hashing to ingesters. @@ -865,7 +895,11 @@ func detectLogLevelFromLogEntry(entry logproto.Entry, structuredMetadata labels. if err != nil { return logLevelInfo } - if otlpSeverityNumber <= int(plog.SeverityNumberDebug4) { + if otlpSeverityNumber == int(plog.SeverityNumberUnspecified) { + return logLevelUnknown + } else if otlpSeverityNumber <= int(plog.SeverityNumberTrace4) { + return logLevelTrace + } else if otlpSeverityNumber <= int(plog.SeverityNumberDebug4) { return logLevelDebug } else if otlpSeverityNumber <= int(plog.SeverityNumberInfo4) { return logLevelInfo @@ -876,18 +910,68 @@ func detectLogLevelFromLogEntry(entry logproto.Entry, structuredMetadata labels. } else if otlpSeverityNumber <= int(plog.SeverityNumberFatal4) { return logLevelFatal } - return logLevelInfo + return logLevelUnknown } return extractLogLevelFromLogLine(entry.Line) } func extractLogLevelFromLogLine(log string) string { - // check for log levels in known log formats to avoid any false detection + var v string + if isJSON(log) { + v = getValueUsingJSONParser(log) + } else { + v = getValueUsingLogfmtParser(log) + } - // json logs: + switch strings.ToLower(v) { + case "trace", "trc": + return logLevelTrace + case "debug", "dbg": + return logLevelDebug + case "info", "inf": + return logLevelInfo + case "warn", "wrn": + return logLevelWarn + case "error", "err": + return logLevelError + case "critical": + return logLevelCritical + case "fatal": + return logLevelFatal + default: + return detectLevelFromLogLine(log) + } +} + +func getValueUsingLogfmtParser(line string) string { + equalIndex := strings.Index(line, "=") + if len(line) == 0 || equalIndex == -1 { + return logLevelUnknown + } + d := logfmt.NewDecoder(strings.NewReader(line)) + d.ScanRecord() + for d.ScanKeyval() { + if _, ok := allowedLabelsForLevel[string(d.Key())]; ok { + return string(d.Value()) + } + } + return logLevelUnknown +} + +func getValueUsingJSONParser(log string) string { + for allowedLabel := range allowedLabelsForLevel { + l, err := jsonparser.GetString([]byte(log), allowedLabel) + if err == nil { + return l + } + } + return logLevelUnknown +} + +func isJSON(line string) bool { var firstNonSpaceChar rune - for _, char := range log { + for _, char := range line { if !unicode.IsSpace(char) { firstNonSpaceChar = char break @@ -895,55 +979,18 @@ func extractLogLevelFromLogLine(log string) string { } var lastNonSpaceChar rune - for i := len(log) - 1; i >= 0; i-- { - char := rune(log[i]) + for i := len(line) - 1; i >= 0; i-- { + char := rune(line[i]) if !unicode.IsSpace(char) { lastNonSpaceChar = char break } } - if firstNonSpaceChar == '{' && lastNonSpaceChar == '}' { - if strings.Contains(log, `:"err"`) || strings.Contains(log, `:"ERR"`) || - strings.Contains(log, `:"error"`) || strings.Contains(log, `:"ERROR"`) { - return logLevelError - } - if strings.Contains(log, `:"warn"`) || strings.Contains(log, `:"WARN"`) || - strings.Contains(log, `:"warning"`) || strings.Contains(log, `:"WARNING"`) { - return logLevelWarn - } - if strings.Contains(log, `:"critical"`) || strings.Contains(log, `:"CRITICAL"`) { - return logLevelCritical - } - if strings.Contains(log, `:"debug"`) || strings.Contains(log, `:"DEBUG"`) { - return logLevelDebug - } - if strings.Contains(log, `:"info"`) || strings.Contains(log, `:"INFO"`) { - return logLevelInfo - } - } - - // logfmt logs: - if strings.Contains(log, "=") { - if strings.Contains(log, "=err") || strings.Contains(log, "=ERR") || - strings.Contains(log, "=error") || strings.Contains(log, "=ERROR") { - return logLevelError - } - if strings.Contains(log, "=warn") || strings.Contains(log, "=WARN") || - strings.Contains(log, "=warning") || strings.Contains(log, "=WARNING") { - return logLevelWarn - } - if strings.Contains(log, "=critical") || strings.Contains(log, "=CRITICAL") { - return logLevelCritical - } - if strings.Contains(log, "=debug") || strings.Contains(log, "=DEBUG") { - return logLevelDebug - } - if strings.Contains(log, "=info") || strings.Contains(log, "=INFO") { - return logLevelInfo - } - } + return firstNonSpaceChar == '{' && lastNonSpaceChar == '}' +} +func detectLevelFromLogLine(log string) string { if strings.Contains(log, "err:") || strings.Contains(log, "ERR:") || strings.Contains(log, "error") || strings.Contains(log, "ERROR") { return logLevelError @@ -958,7 +1005,5 @@ func extractLogLevelFromLogLine(log string) string { if strings.Contains(log, "debug:") || strings.Contains(log, "DEBUG:") { return logLevelDebug } - - // Default to info if no specific level is found - return logLevelInfo + return logLevelUnknown } diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index f68e5f3a701a..dc58f758835c 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -1364,6 +1364,29 @@ func prepare(t *testing.T, numDistributors, numIngesters int, limits *validation return distributors, ingesters } +func makeWriteRequestWithLabelsWithLevel(lines, size int, labels []string, level string) *logproto.PushRequest { + streams := make([]logproto.Stream, len(labels)) + for i := 0; i < len(labels); i++ { + stream := logproto.Stream{Labels: labels[i]} + + for j := 0; j < lines; j++ { + // Construct the log line, honoring the input size + line := "msg=" + strconv.Itoa(j) + strings.Repeat("0", size) + " severity=" + level + + stream.Entries = append(stream.Entries, logproto.Entry{ + Timestamp: time.Now().Add(time.Duration(j) * time.Millisecond), + Line: line, + }) + } + + streams[i] = stream + } + + return &logproto.PushRequest{ + Streams: streams, + } +} + func makeWriteRequestWithLabels(lines, size int, labels []string) *logproto.PushRequest { streams := make([]logproto.Stream, len(labels)) for i := 0; i < len(labels); i++ { @@ -1536,7 +1559,7 @@ func Test_DetectLogLevels(t *testing.T) { require.Len(t, topVal.Streams[0].Entries[0].StructuredMetadata, 0) }) - t.Run("log level detection enabled", func(t *testing.T) { + t.Run("log level detection enabled but level cannot be detected", func(t *testing.T) { limits, ingester := setup(true) distributors, _ := prepare(t, 1, 5, limits, func(addr string) (ring_client.PoolClient, error) { return ingester, nil }) @@ -1545,10 +1568,22 @@ func Test_DetectLogLevels(t *testing.T) { require.NoError(t, err) topVal := ingester.Peek() require.Equal(t, `{foo="bar"}`, topVal.Streams[0].Labels) + require.Len(t, topVal.Streams[0].Entries[0].StructuredMetadata, 0) + }) + + t.Run("log level detection enabled and warn logs", func(t *testing.T) { + limits, ingester := setup(true) + distributors, _ := prepare(t, 1, 5, limits, func(addr string) (ring_client.PoolClient, error) { return ingester, nil }) + + writeReq := makeWriteRequestWithLabelsWithLevel(1, 10, []string{`{foo="bar"}`}, "warn") + _, err := distributors[0].Push(ctx, writeReq) + require.NoError(t, err) + topVal := ingester.Peek() + require.Equal(t, `{foo="bar"}`, topVal.Streams[0].Labels) require.Equal(t, push.LabelsAdapter{ { - Name: labelLevel, - Value: logLevelInfo, + Name: levelLabel, + Value: logLevelWarn, }, }, topVal.Streams[0].Entries[0].StructuredMetadata) }) @@ -1562,7 +1597,10 @@ func Test_DetectLogLevels(t *testing.T) { require.NoError(t, err) topVal := ingester.Peek() require.Equal(t, `{foo="bar", level="debug"}`, topVal.Streams[0].Labels) - require.Len(t, topVal.Streams[0].Entries[0].StructuredMetadata, 0) + sm := topVal.Streams[0].Entries[0].StructuredMetadata + require.Len(t, sm, 1) + require.Equal(t, sm[0].Name, levelLabel) + require.Equal(t, sm[0].Value, logLevelDebug) }) t.Run("log level detection enabled but log level already present as structured metadata", func(t *testing.T) { @@ -1572,7 +1610,7 @@ func Test_DetectLogLevels(t *testing.T) { writeReq := makeWriteRequestWithLabels(1, 10, []string{`{foo="bar"}`}) writeReq.Streams[0].Entries[0].StructuredMetadata = push.LabelsAdapter{ { - Name: labelLevel, + Name: "severity", Value: logLevelWarn, }, } @@ -1580,12 +1618,16 @@ func Test_DetectLogLevels(t *testing.T) { require.NoError(t, err) topVal := ingester.Peek() require.Equal(t, `{foo="bar"}`, topVal.Streams[0].Labels) + sm := topVal.Streams[0].Entries[0].StructuredMetadata require.Equal(t, push.LabelsAdapter{ { - Name: labelLevel, + Name: "severity", + Value: logLevelWarn, + }, { + Name: levelLabel, Value: logLevelWarn, }, - }, topVal.Streams[0].Entries[0].StructuredMetadata) + }, sm) }) } @@ -1625,7 +1667,7 @@ func Test_detectLogLevelFromLogEntry(t *testing.T) { entry: logproto.Entry{ Line: "foo", }, - expectedLogLevel: logLevelInfo, + expectedLogLevel: logLevelUnknown, }, { name: "non otlp with log level keywords in log line", @@ -1637,10 +1679,38 @@ func Test_detectLogLevelFromLogEntry(t *testing.T) { { name: "json log line with an error", entry: logproto.Entry{ - Line: `{"foo":"bar",msg:"message with keyword error but it should not get picked up",level":"critical"}`, + Line: `{"foo":"bar","msg":"message with keyword error but it should not get picked up","level":"critical"}`, }, expectedLogLevel: logLevelCritical, }, + { + name: "json log line with an error", + entry: logproto.Entry{ + Line: `{"FOO":"bar","MSG":"message with keyword error but it should not get picked up","LEVEL":"Critical"}`, + }, + expectedLogLevel: logLevelCritical, + }, + { + name: "json log line with an warning", + entry: logproto.Entry{ + Line: `{"foo":"bar","msg":"message with keyword warn but it should not get picked up","level":"warn"}`, + }, + expectedLogLevel: logLevelWarn, + }, + { + name: "json log line with an warning", + entry: logproto.Entry{ + Line: `{"foo":"bar","msg":"message with keyword warn but it should not get picked up","SEVERITY":"FATAL"}`, + }, + expectedLogLevel: logLevelFatal, + }, + { + name: "json log line with an error in block case", + entry: logproto.Entry{ + Line: `{"foo":"bar","msg":"message with keyword warn but it should not get picked up","level":"ERR"}`, + }, + expectedLogLevel: logLevelError, + }, { name: "logfmt log line with a warn", entry: logproto.Entry{ @@ -1648,6 +1718,55 @@ func Test_detectLogLevelFromLogEntry(t *testing.T) { }, expectedLogLevel: logLevelWarn, }, + { + name: "logfmt log line with a warn with camel case", + entry: logproto.Entry{ + Line: `foo=bar msg="message with keyword error but it should not get picked up" level=Warn`, + }, + expectedLogLevel: logLevelWarn, + }, + { + name: "logfmt log line with a trace", + entry: logproto.Entry{ + Line: `foo=bar msg="message with keyword error but it should not get picked up" level=Trace`, + }, + expectedLogLevel: logLevelTrace, + }, + { + name: "logfmt log line with some other level returns unknown log level", + entry: logproto.Entry{ + Line: `foo=bar msg="message with keyword but it should not get picked up" level=NA`, + }, + expectedLogLevel: logLevelUnknown, + }, + { + name: "logfmt log line with label Severity is allowed for level detection", + entry: logproto.Entry{ + Line: `foo=bar msg="message with keyword but it should not get picked up" severity=critical`, + }, + expectedLogLevel: logLevelCritical, + }, + { + name: "logfmt log line with label Severity with camelcase is allowed for level detection", + entry: logproto.Entry{ + Line: `Foo=bar MSG="Message with keyword but it should not get picked up" Severity=critical`, + }, + expectedLogLevel: logLevelCritical, + }, + { + name: "logfmt log line with a info with non standard case", + entry: logproto.Entry{ + Line: `foo=bar msg="message with keyword error but it should not get picked up" level=inFO`, + }, + expectedLogLevel: logLevelInfo, + }, + { + name: "logfmt log line with a info with non block case for level", + entry: logproto.Entry{ + Line: `FOO=bar MSG="message with keyword error but it should not get picked up" LEVEL=inFO`, + }, + expectedLogLevel: logLevelInfo, + }, } { t.Run(tc.name, func(t *testing.T) { detectedLogLevel := detectLogLevelFromLogEntry(tc.entry, logproto.FromLabelAdaptersToLabels(tc.entry.StructuredMetadata)) @@ -1669,6 +1788,24 @@ func Benchmark_extractLogLevelFromLogLine(b *testing.B) { "RJtBuW ABOqQHLSlNuUw ZlM2nGS2 jwA7cXEOJhY 3oPv4gGAz Uqdre16MF92C06jOH dayqTCK8XmIilT uvgywFSfNadYvRDQa " + "iUbswJNcwqcr6huw LAGrZS8NGlqqzcD2wFU rm Uqcrh3TKLUCkfkwLm 5CIQbxMCUz boBrEHxvCBrUo YJoF2iyif4xq3q yk " + for i := 0; i < b.N; i++ { + level := extractLogLevelFromLogLine(logLine) + require.Equal(b, logLevelUnknown, level) + } +} + +func Benchmark_optParseExtractLogLevelFromLogLineJson(b *testing.B) { + logLine := `{"msg": "something" , "level": "error", "id": "1"}` + + for i := 0; i < b.N; i++ { + level := extractLogLevelFromLogLine(logLine) + require.Equal(b, logLevelError, level) + } +} + +func Benchmark_optParseExtractLogLevelFromLogLineLogfmt(b *testing.B) { + logLine := `FOO=bar MSG="message with keyword error but it should not get picked up" LEVEL=inFO` + for i := 0; i < b.N; i++ { level := extractLogLevelFromLogLine(logLine) require.Equal(b, logLevelInfo, level) diff --git a/pkg/validation/limits.go b/pkg/validation/limits.go index 036f5660c092..27e6702dc988 100644 --- a/pkg/validation/limits.go +++ b/pkg/validation/limits.go @@ -258,7 +258,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { "job", } f.Var((*dskit_flagext.StringSlice)(&l.DiscoverServiceName), "validation.discover-service-name", "If no service_name label exists, Loki maps a single label from the configured list to service_name. If none of the configured labels exist in the stream, label is set to unknown_service. Empty list disables setting the label.") - f.BoolVar(&l.DiscoverLogLevels, "validation.discover-log-levels", true, "Discover and add log levels during ingestion, if not present already. Levels would be added to Structured Metadata with name 'level' and one of the values from 'debug', 'info', 'warn', 'error', 'critical', 'fatal'.") + f.BoolVar(&l.DiscoverLogLevels, "validation.discover-log-levels", true, "Discover and add log levels during ingestion, if not present already. Levels would be added to Structured Metadata with name level/LEVEL/Level/Severity/severity/SEVERITY/lvl/LVL/Lvl (case-sensitive) and one of the values from 'trace', 'debug', 'info', 'warn', 'error', 'critical', 'fatal' (case insensitive).") _ = l.RejectOldSamplesMaxAge.Set("7d") f.Var(&l.RejectOldSamplesMaxAge, "validation.reject-old-samples.max-age", "Maximum accepted sample age before rejecting.") diff --git a/vendor/github.com/buger/jsonparser/.gitignore b/vendor/github.com/buger/jsonparser/.gitignore new file mode 100644 index 000000000000..5598d8a5691a --- /dev/null +++ b/vendor/github.com/buger/jsonparser/.gitignore @@ -0,0 +1,12 @@ + +*.test + +*.out + +*.mprof + +.idea + +vendor/github.com/buger/goterm/ +prof.cpu +prof.mem diff --git a/vendor/github.com/buger/jsonparser/.travis.yml b/vendor/github.com/buger/jsonparser/.travis.yml new file mode 100644 index 000000000000..dbfb7cf98830 --- /dev/null +++ b/vendor/github.com/buger/jsonparser/.travis.yml @@ -0,0 +1,11 @@ +language: go +arch: + - amd64 + - ppc64le +go: + - 1.7.x + - 1.8.x + - 1.9.x + - 1.10.x + - 1.11.x +script: go test -v ./. diff --git a/vendor/github.com/buger/jsonparser/Dockerfile b/vendor/github.com/buger/jsonparser/Dockerfile new file mode 100644 index 000000000000..37fc9fd0b4d6 --- /dev/null +++ b/vendor/github.com/buger/jsonparser/Dockerfile @@ -0,0 +1,12 @@ +FROM golang:1.6 + +RUN go get github.com/Jeffail/gabs +RUN go get github.com/bitly/go-simplejson +RUN go get github.com/pquerna/ffjson +RUN go get github.com/antonholmquist/jason +RUN go get github.com/mreiferson/go-ujson +RUN go get -tags=unsafe -u github.com/ugorji/go/codec +RUN go get github.com/mailru/easyjson + +WORKDIR /go/src/github.com/buger/jsonparser +ADD . /go/src/github.com/buger/jsonparser \ No newline at end of file diff --git a/vendor/github.com/buger/jsonparser/LICENSE b/vendor/github.com/buger/jsonparser/LICENSE new file mode 100644 index 000000000000..ac25aeb7da28 --- /dev/null +++ b/vendor/github.com/buger/jsonparser/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Leonid Bugaev + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/buger/jsonparser/Makefile b/vendor/github.com/buger/jsonparser/Makefile new file mode 100644 index 000000000000..e843368cf103 --- /dev/null +++ b/vendor/github.com/buger/jsonparser/Makefile @@ -0,0 +1,36 @@ +SOURCE = parser.go +CONTAINER = jsonparser +SOURCE_PATH = /go/src/github.com/buger/jsonparser +BENCHMARK = JsonParser +BENCHTIME = 5s +TEST = . +DRUN = docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) + +build: + docker build -t $(CONTAINER) . + +race: + $(DRUN) --env GORACE="halt_on_error=1" go test ./. $(ARGS) -v -race -timeout 15s + +bench: + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -benchtime $(BENCHTIME) -v + +bench_local: + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench . $(ARGS) -benchtime $(BENCHTIME) -v + +profile: + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -memprofile mem.mprof -v + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -cpuprofile cpu.out -v + $(DRUN) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -c + +test: + $(DRUN) go test $(LDFLAGS) ./ -run $(TEST) -timeout 10s $(ARGS) -v + +fmt: + $(DRUN) go fmt ./... + +vet: + $(DRUN) go vet ./. + +bash: + $(DRUN) /bin/bash \ No newline at end of file diff --git a/vendor/github.com/buger/jsonparser/README.md b/vendor/github.com/buger/jsonparser/README.md new file mode 100644 index 000000000000..d7e0ec397aff --- /dev/null +++ b/vendor/github.com/buger/jsonparser/README.md @@ -0,0 +1,365 @@ +[![Go Report Card](https://goreportcard.com/badge/github.com/buger/jsonparser)](https://goreportcard.com/report/github.com/buger/jsonparser) ![License](https://img.shields.io/dub/l/vibe-d.svg) +# Alternative JSON parser for Go (10x times faster standard library) + +It does not require you to know the structure of the payload (eg. create structs), and allows accessing fields by providing the path to them. It is up to **10 times faster** than standard `encoding/json` package (depending on payload size and usage), **allocates no memory**. See benchmarks below. + +## Rationale +Originally I made this for a project that relies on a lot of 3rd party APIs that can be unpredictable and complex. +I love simplicity and prefer to avoid external dependecies. `encoding/json` requires you to know exactly your data structures, or if you prefer to use `map[string]interface{}` instead, it will be very slow and hard to manage. +I investigated what's on the market and found that most libraries are just wrappers around `encoding/json`, there is few options with own parsers (`ffjson`, `easyjson`), but they still requires you to create data structures. + + +Goal of this project is to push JSON parser to the performance limits and not sacrifice with compliance and developer user experience. + +## Example +For the given JSON our goal is to extract the user's full name, number of github followers and avatar. + +```go +import "github.com/buger/jsonparser" + +... + +data := []byte(`{ + "person": { + "name": { + "first": "Leonid", + "last": "Bugaev", + "fullName": "Leonid Bugaev" + }, + "github": { + "handle": "buger", + "followers": 109 + }, + "avatars": [ + { "url": "https://avatars1.githubusercontent.com/u/14009?v=3&s=460", "type": "thumbnail" } + ] + }, + "company": { + "name": "Acme" + } +}`) + +// You can specify key path by providing arguments to Get function +jsonparser.Get(data, "person", "name", "fullName") + +// There is `GetInt` and `GetBoolean` helpers if you exactly know key data type +jsonparser.GetInt(data, "person", "github", "followers") + +// When you try to get object, it will return you []byte slice pointer to data containing it +// In `company` it will be `{"name": "Acme"}` +jsonparser.Get(data, "company") + +// If the key doesn't exist it will throw an error +var size int64 +if value, err := jsonparser.GetInt(data, "company", "size"); err == nil { + size = value +} + +// You can use `ArrayEach` helper to iterate items [item1, item2 .... itemN] +jsonparser.ArrayEach(data, func(value []byte, dataType jsonparser.ValueType, offset int, err error) { + fmt.Println(jsonparser.Get(value, "url")) +}, "person", "avatars") + +// Or use can access fields by index! +jsonparser.GetString(data, "person", "avatars", "[0]", "url") + +// You can use `ObjectEach` helper to iterate objects { "key1":object1, "key2":object2, .... "keyN":objectN } +jsonparser.ObjectEach(data, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error { + fmt.Printf("Key: '%s'\n Value: '%s'\n Type: %s\n", string(key), string(value), dataType) + return nil +}, "person", "name") + +// The most efficient way to extract multiple keys is `EachKey` + +paths := [][]string{ + []string{"person", "name", "fullName"}, + []string{"person", "avatars", "[0]", "url"}, + []string{"company", "url"}, +} +jsonparser.EachKey(data, func(idx int, value []byte, vt jsonparser.ValueType, err error){ + switch idx { + case 0: // []string{"person", "name", "fullName"} + ... + case 1: // []string{"person", "avatars", "[0]", "url"} + ... + case 2: // []string{"company", "url"}, + ... + } +}, paths...) + +// For more information see docs below +``` + +## Need to speedup your app? + +I'm available for consulting and can help you push your app performance to the limits. Ping me at: leonsbox@gmail.com. + +## Reference + +Library API is really simple. You just need the `Get` method to perform any operation. The rest is just helpers around it. + +You also can view API at [godoc.org](https://godoc.org/github.com/buger/jsonparser) + + +### **`Get`** +```go +func Get(data []byte, keys ...string) (value []byte, dataType jsonparser.ValueType, offset int, err error) +``` +Receives data structure, and key path to extract value from. + +Returns: +* `value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error +* `dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null` +* `offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper. +* `err` - If the key is not found or any other parsing issue, it should return error. If key not found it also sets `dataType` to `NotExist` + +Accepts multiple keys to specify path to JSON value (in case of quering nested structures). +If no keys are provided it will try to extract the closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation. + +Note that keys can be an array indexes: `jsonparser.GetInt("person", "avatars", "[0]", "url")`, pretty cool, yeah? + +### **`GetString`** +```go +func GetString(data []byte, keys ...string) (val string, err error) +``` +Returns strings properly handing escaped and unicode characters. Note that this will cause additional memory allocations. + +### **`GetUnsafeString`** +If you need string in your app, and ready to sacrifice with support of escaped symbols in favor of speed. It returns string mapped to existing byte slice memory, without any allocations: +```go +s, _, := jsonparser.GetUnsafeString(data, "person", "name", "title") +switch s { + case 'CEO': + ... + case 'Engineer' + ... + ... +} +``` +Note that `unsafe` here means that your string will exist until GC will free underlying byte slice, for most of cases it means that you can use this string only in current context, and should not pass it anywhere externally: through channels or any other way. + + +### **`GetBoolean`**, **`GetInt`** and **`GetFloat`** +```go +func GetBoolean(data []byte, keys ...string) (val bool, err error) + +func GetFloat(data []byte, keys ...string) (val float64, err error) + +func GetInt(data []byte, keys ...string) (val int64, err error) +``` +If you know the key type, you can use the helpers above. +If key data type do not match, it will return error. + +### **`ArrayEach`** +```go +func ArrayEach(data []byte, cb func(value []byte, dataType jsonparser.ValueType, offset int, err error), keys ...string) +``` +Needed for iterating arrays, accepts a callback function with the same return arguments as `Get`. + +### **`ObjectEach`** +```go +func ObjectEach(data []byte, callback func(key []byte, value []byte, dataType ValueType, offset int) error, keys ...string) (err error) +``` +Needed for iterating object, accepts a callback function. Example: +```go +var handler func([]byte, []byte, jsonparser.ValueType, int) error +handler = func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error { + //do stuff here +} +jsonparser.ObjectEach(myJson, handler) +``` + + +### **`EachKey`** +```go +func EachKey(data []byte, cb func(idx int, value []byte, dataType jsonparser.ValueType, err error), paths ...[]string) +``` +When you need to read multiple keys, and you do not afraid of low-level API `EachKey` is your friend. It read payload only single time, and calls callback function once path is found. For example when you call multiple times `Get`, it has to process payload multiple times, each time you call it. Depending on payload `EachKey` can be multiple times faster than `Get`. Path can use nested keys as well! + +```go +paths := [][]string{ + []string{"uuid"}, + []string{"tz"}, + []string{"ua"}, + []string{"st"}, +} +var data SmallPayload + +jsonparser.EachKey(smallFixture, func(idx int, value []byte, vt jsonparser.ValueType, err error){ + switch idx { + case 0: + data.Uuid, _ = value + case 1: + v, _ := jsonparser.ParseInt(value) + data.Tz = int(v) + case 2: + data.Ua, _ = value + case 3: + v, _ := jsonparser.ParseInt(value) + data.St = int(v) + } +}, paths...) +``` + +### **`Set`** +```go +func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) +``` +Receives existing data structure, key path to set, and value to set at that key. *This functionality is experimental.* + +Returns: +* `value` - Pointer to original data structure with updated or added key value. +* `err` - If any parsing issue, it should return error. + +Accepts multiple keys to specify path to JSON value (in case of updating or creating nested structures). + +Note that keys can be an array indexes: `jsonparser.Set(data, []byte("http://github.com"), "person", "avatars", "[0]", "url")` + +### **`Delete`** +```go +func Delete(data []byte, keys ...string) value []byte +``` +Receives existing data structure, and key path to delete. *This functionality is experimental.* + +Returns: +* `value` - Pointer to original data structure with key path deleted if it can be found. If there is no key path, then the whole data structure is deleted. + +Accepts multiple keys to specify path to JSON value (in case of updating or creating nested structures). + +Note that keys can be an array indexes: `jsonparser.Delete(data, "person", "avatars", "[0]", "url")` + + +## What makes it so fast? +* It does not rely on `encoding/json`, `reflection` or `interface{}`, the only real package dependency is `bytes`. +* Operates with JSON payload on byte level, providing you pointers to the original data structure: no memory allocation. +* No automatic type conversions, by default everything is a []byte, but it provides you value type, so you can convert by yourself (there is few helpers included). +* Does not parse full record, only keys you specified + + +## Benchmarks + +There are 3 benchmark types, trying to simulate real-life usage for small, medium and large JSON payloads. +For each metric, the lower value is better. Time/op is in nanoseconds. Values better than standard encoding/json marked as bold text. +Benchmarks run on standard Linode 1024 box. + +Compared libraries: +* https://golang.org/pkg/encoding/json +* https://github.com/Jeffail/gabs +* https://github.com/a8m/djson +* https://github.com/bitly/go-simplejson +* https://github.com/antonholmquist/jason +* https://github.com/mreiferson/go-ujson +* https://github.com/ugorji/go/codec +* https://github.com/pquerna/ffjson +* https://github.com/mailru/easyjson +* https://github.com/buger/jsonparser + +#### TLDR +If you want to skip next sections we have 2 winner: `jsonparser` and `easyjson`. +`jsonparser` is up to 10 times faster than standard `encoding/json` package (depending on payload size and usage), and almost infinitely (literally) better in memory consumption because it operates with data on byte level, and provide direct slice pointers. +`easyjson` wins in CPU in medium tests and frankly i'm impressed with this package: it is remarkable results considering that it is almost drop-in replacement for `encoding/json` (require some code generation). + +It's hard to fully compare `jsonparser` and `easyjson` (or `ffson`), they a true parsers and fully process record, unlike `jsonparser` which parse only keys you specified. + +If you searching for replacement of `encoding/json` while keeping structs, `easyjson` is an amazing choice. If you want to process dynamic JSON, have memory constrains, or more control over your data you should try `jsonparser`. + +`jsonparser` performance heavily depends on usage, and it works best when you do not need to process full record, only some keys. The more calls you need to make, the slower it will be, in contrast `easyjson` (or `ffjson`, `encoding/json`) parser record only 1 time, and then you can make as many calls as you want. + +With great power comes great responsibility! :) + + +#### Small payload + +Each test processes 190 bytes of http log as a JSON record. +It should read multiple fields. +https://github.com/buger/jsonparser/blob/master/benchmark/benchmark_small_payload_test.go + +Library | time/op | bytes/op | allocs/op + ------ | ------- | -------- | ------- +encoding/json struct | 7879 | 880 | 18 +encoding/json interface{} | 8946 | 1521 | 38 +Jeffail/gabs | 10053 | 1649 | 46 +bitly/go-simplejson | 10128 | 2241 | 36 +antonholmquist/jason | 27152 | 7237 | 101 +github.com/ugorji/go/codec | 8806 | 2176 | 31 +mreiferson/go-ujson | **7008** | **1409** | 37 +a8m/djson | 3862 | 1249 | 30 +pquerna/ffjson | **3769** | **624** | **15** +mailru/easyjson | **2002** | **192** | **9** +buger/jsonparser | **1367** | **0** | **0** +buger/jsonparser (EachKey API) | **809** | **0** | **0** + +Winners are ffjson, easyjson and jsonparser, where jsonparser is up to 9.8x faster than encoding/json and 4.6x faster than ffjson, and slightly faster than easyjson. +If you look at memory allocation, jsonparser has no rivals, as it makes no data copy and operates with raw []byte structures and pointers to it. + +#### Medium payload + +Each test processes a 2.4kb JSON record (based on Clearbit API). +It should read multiple nested fields and 1 array. + +https://github.com/buger/jsonparser/blob/master/benchmark/benchmark_medium_payload_test.go + +| Library | time/op | bytes/op | allocs/op | +| ------- | ------- | -------- | --------- | +| encoding/json struct | 57749 | 1336 | 29 | +| encoding/json interface{} | 79297 | 10627 | 215 | +| Jeffail/gabs | 83807 | 11202 | 235 | +| bitly/go-simplejson | 88187 | 17187 | 220 | +| antonholmquist/jason | 94099 | 19013 | 247 | +| github.com/ugorji/go/codec | 114719 | 6712 | 152 | +| mreiferson/go-ujson | **56972** | 11547 | 270 | +| a8m/djson | 28525 | 10196 | 198 | +| pquerna/ffjson | **20298** | **856** | **20** | +| mailru/easyjson | **10512** | **336** | **12** | +| buger/jsonparser | **15955** | **0** | **0** | +| buger/jsonparser (EachKey API) | **8916** | **0** | **0** | + +The difference between ffjson and jsonparser in CPU usage is smaller, while the memory consumption difference is growing. On the other hand `easyjson` shows remarkable performance for medium payload. + +`gabs`, `go-simplejson` and `jason` are based on encoding/json and map[string]interface{} and actually only helpers for unstructured JSON, their performance correlate with `encoding/json interface{}`, and they will skip next round. +`go-ujson` while have its own parser, shows same performance as `encoding/json`, also skips next round. Same situation with `ugorji/go/codec`, but it showed unexpectedly bad performance for complex payloads. + + +#### Large payload + +Each test processes a 24kb JSON record (based on Discourse API) +It should read 2 arrays, and for each item in array get a few fields. +Basically it means processing a full JSON file. + +https://github.com/buger/jsonparser/blob/master/benchmark/benchmark_large_payload_test.go + +| Library | time/op | bytes/op | allocs/op | +| --- | --- | --- | --- | +| encoding/json struct | 748336 | 8272 | 307 | +| encoding/json interface{} | 1224271 | 215425 | 3395 | +| a8m/djson | 510082 | 213682 | 2845 | +| pquerna/ffjson | **312271** | **7792** | **298** | +| mailru/easyjson | **154186** | **6992** | **288** | +| buger/jsonparser | **85308** | **0** | **0** | + +`jsonparser` now is a winner, but do not forget that it is way more lightweight parser than `ffson` or `easyjson`, and they have to parser all the data, while `jsonparser` parse only what you need. All `ffjson`, `easysjon` and `jsonparser` have their own parsing code, and does not depend on `encoding/json` or `interface{}`, thats one of the reasons why they are so fast. `easyjson` also use a bit of `unsafe` package to reduce memory consuption (in theory it can lead to some unexpected GC issue, but i did not tested enough) + +Also last benchmark did not included `EachKey` test, because in this particular case we need to read lot of Array values, and using `ArrayEach` is more efficient. + +## Questions and support + +All bug-reports and suggestions should go though Github Issues. + +## Contributing + +1. Fork it +2. Create your feature branch (git checkout -b my-new-feature) +3. Commit your changes (git commit -am 'Added some feature') +4. Push to the branch (git push origin my-new-feature) +5. Create new Pull Request + +## Development + +All my development happens using Docker, and repo include some Make tasks to simplify development. + +* `make build` - builds docker image, usually can be called only once +* `make test` - run tests +* `make fmt` - run go fmt +* `make bench` - run benchmarks (if you need to run only single benchmark modify `BENCHMARK` variable in make file) +* `make profile` - runs benchmark and generate 3 files- `cpu.out`, `mem.mprof` and `benchmark.test` binary, which can be used for `go tool pprof` +* `make bash` - enter container (i use it for running `go tool pprof` above) diff --git a/vendor/github.com/buger/jsonparser/bytes.go b/vendor/github.com/buger/jsonparser/bytes.go new file mode 100644 index 000000000000..0bb0ff39562c --- /dev/null +++ b/vendor/github.com/buger/jsonparser/bytes.go @@ -0,0 +1,47 @@ +package jsonparser + +import ( + bio "bytes" +) + +// minInt64 '-9223372036854775808' is the smallest representable number in int64 +const minInt64 = `9223372036854775808` + +// About 2x faster then strconv.ParseInt because it only supports base 10, which is enough for JSON +func parseInt(bytes []byte) (v int64, ok bool, overflow bool) { + if len(bytes) == 0 { + return 0, false, false + } + + var neg bool = false + if bytes[0] == '-' { + neg = true + bytes = bytes[1:] + } + + var b int64 = 0 + for _, c := range bytes { + if c >= '0' && c <= '9' { + b = (10 * v) + int64(c-'0') + } else { + return 0, false, false + } + if overflow = (b < v); overflow { + break + } + v = b + } + + if overflow { + if neg && bio.Equal(bytes, []byte(minInt64)) { + return b, true, false + } + return 0, false, true + } + + if neg { + return -v, true, false + } else { + return v, true, false + } +} diff --git a/vendor/github.com/buger/jsonparser/bytes_safe.go b/vendor/github.com/buger/jsonparser/bytes_safe.go new file mode 100644 index 000000000000..ff16a4a19552 --- /dev/null +++ b/vendor/github.com/buger/jsonparser/bytes_safe.go @@ -0,0 +1,25 @@ +// +build appengine appenginevm + +package jsonparser + +import ( + "strconv" +) + +// See fastbytes_unsafe.go for explanation on why *[]byte is used (signatures must be consistent with those in that file) + +func equalStr(b *[]byte, s string) bool { + return string(*b) == s +} + +func parseFloat(b *[]byte) (float64, error) { + return strconv.ParseFloat(string(*b), 64) +} + +func bytesToString(b *[]byte) string { + return string(*b) +} + +func StringToBytes(s string) []byte { + return []byte(s) +} diff --git a/vendor/github.com/buger/jsonparser/bytes_unsafe.go b/vendor/github.com/buger/jsonparser/bytes_unsafe.go new file mode 100644 index 000000000000..589fea87eb33 --- /dev/null +++ b/vendor/github.com/buger/jsonparser/bytes_unsafe.go @@ -0,0 +1,44 @@ +// +build !appengine,!appenginevm + +package jsonparser + +import ( + "reflect" + "strconv" + "unsafe" + "runtime" +) + +// +// The reason for using *[]byte rather than []byte in parameters is an optimization. As of Go 1.6, +// the compiler cannot perfectly inline the function when using a non-pointer slice. That is, +// the non-pointer []byte parameter version is slower than if its function body is manually +// inlined, whereas the pointer []byte version is equally fast to the manually inlined +// version. Instruction count in assembly taken from "go tool compile" confirms this difference. +// +// TODO: Remove hack after Go 1.7 release +// +func equalStr(b *[]byte, s string) bool { + return *(*string)(unsafe.Pointer(b)) == s +} + +func parseFloat(b *[]byte) (float64, error) { + return strconv.ParseFloat(*(*string)(unsafe.Pointer(b)), 64) +} + +// A hack until issue golang/go#2632 is fixed. +// See: https://github.com/golang/go/issues/2632 +func bytesToString(b *[]byte) string { + return *(*string)(unsafe.Pointer(b)) +} + +func StringToBytes(s string) []byte { + b := make([]byte, 0, 0) + bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + sh := (*reflect.StringHeader)(unsafe.Pointer(&s)) + bh.Data = sh.Data + bh.Cap = sh.Len + bh.Len = sh.Len + runtime.KeepAlive(s) + return b +} diff --git a/vendor/github.com/buger/jsonparser/escape.go b/vendor/github.com/buger/jsonparser/escape.go new file mode 100644 index 000000000000..49669b94207c --- /dev/null +++ b/vendor/github.com/buger/jsonparser/escape.go @@ -0,0 +1,173 @@ +package jsonparser + +import ( + "bytes" + "unicode/utf8" +) + +// JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7 + +const supplementalPlanesOffset = 0x10000 +const highSurrogateOffset = 0xD800 +const lowSurrogateOffset = 0xDC00 + +const basicMultilingualPlaneReservedOffset = 0xDFFF +const basicMultilingualPlaneOffset = 0xFFFF + +func combineUTF16Surrogates(high, low rune) rune { + return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset) +} + +const badHex = -1 + +func h2I(c byte) int { + switch { + case c >= '0' && c <= '9': + return int(c - '0') + case c >= 'A' && c <= 'F': + return int(c - 'A' + 10) + case c >= 'a' && c <= 'f': + return int(c - 'a' + 10) + } + return badHex +} + +// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. The prefix \u is assumed to be present and +// is not checked. +// In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together. +// This function only handles one; decodeUnicodeEscape handles this more complex case. +func decodeSingleUnicodeEscape(in []byte) (rune, bool) { + // We need at least 6 characters total + if len(in) < 6 { + return utf8.RuneError, false + } + + // Convert hex to decimal + h1, h2, h3, h4 := h2I(in[2]), h2I(in[3]), h2I(in[4]), h2I(in[5]) + if h1 == badHex || h2 == badHex || h3 == badHex || h4 == badHex { + return utf8.RuneError, false + } + + // Compose the hex digits + return rune(h1<<12 + h2<<8 + h3<<4 + h4), true +} + +// isUTF16EncodedRune checks if a rune is in the range for non-BMP characters, +// which is used to describe UTF16 chars. +// Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane +func isUTF16EncodedRune(r rune) bool { + return highSurrogateOffset <= r && r <= basicMultilingualPlaneReservedOffset +} + +func decodeUnicodeEscape(in []byte) (rune, int) { + if r, ok := decodeSingleUnicodeEscape(in); !ok { + // Invalid Unicode escape + return utf8.RuneError, -1 + } else if r <= basicMultilingualPlaneOffset && !isUTF16EncodedRune(r) { + // Valid Unicode escape in Basic Multilingual Plane + return r, 6 + } else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain + // UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate" + return utf8.RuneError, -1 + } else if r2 < lowSurrogateOffset { + // Invalid UTF16 "low surrogate" + return utf8.RuneError, -1 + } else { + // Valid UTF16 surrogate pair + return combineUTF16Surrogates(r, r2), 12 + } +} + +// backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X] +var backslashCharEscapeTable = [...]byte{ + '"': '"', + '\\': '\\', + '/': '/', + 'b': '\b', + 'f': '\f', + 'n': '\n', + 'r': '\r', + 't': '\t', +} + +// unescapeToUTF8 unescapes the single escape sequence starting at 'in' into 'out' and returns +// how many characters were consumed from 'in' and emitted into 'out'. +// If a valid escape sequence does not appear as a prefix of 'in', (-1, -1) to signal the error. +func unescapeToUTF8(in, out []byte) (inLen int, outLen int) { + if len(in) < 2 || in[0] != '\\' { + // Invalid escape due to insufficient characters for any escape or no initial backslash + return -1, -1 + } + + // https://tools.ietf.org/html/rfc7159#section-7 + switch e := in[1]; e { + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + // Valid basic 2-character escapes (use lookup table) + out[0] = backslashCharEscapeTable[e] + return 2, 1 + case 'u': + // Unicode escape + if r, inLen := decodeUnicodeEscape(in); inLen == -1 { + // Invalid Unicode escape + return -1, -1 + } else { + // Valid Unicode escape; re-encode as UTF8 + outLen := utf8.EncodeRune(out, r) + return inLen, outLen + } + } + + return -1, -1 +} + +// unescape unescapes the string contained in 'in' and returns it as a slice. +// If 'in' contains no escaped characters: +// Returns 'in'. +// Else, if 'out' is of sufficient capacity (guaranteed if cap(out) >= len(in)): +// 'out' is used to build the unescaped string and is returned with no extra allocation +// Else: +// A new slice is allocated and returned. +func Unescape(in, out []byte) ([]byte, error) { + firstBackslash := bytes.IndexByte(in, '\\') + if firstBackslash == -1 { + return in, nil + } + + // Get a buffer of sufficient size (allocate if needed) + if cap(out) < len(in) { + out = make([]byte, len(in)) + } else { + out = out[0:len(in)] + } + + // Copy the first sequence of unescaped bytes to the output and obtain a buffer pointer (subslice) + copy(out, in[:firstBackslash]) + in = in[firstBackslash:] + buf := out[firstBackslash:] + + for len(in) > 0 { + // Unescape the next escaped character + inLen, bufLen := unescapeToUTF8(in, buf) + if inLen == -1 { + return nil, MalformedStringEscapeError + } + + in = in[inLen:] + buf = buf[bufLen:] + + // Copy everything up until the next backslash + nextBackslash := bytes.IndexByte(in, '\\') + if nextBackslash == -1 { + copy(buf, in) + buf = buf[len(in):] + break + } else { + copy(buf, in[:nextBackslash]) + buf = buf[nextBackslash:] + in = in[nextBackslash:] + } + } + + // Trim the out buffer to the amount that was actually emitted + return out[:len(out)-len(buf)], nil +} diff --git a/vendor/github.com/buger/jsonparser/fuzz.go b/vendor/github.com/buger/jsonparser/fuzz.go new file mode 100644 index 000000000000..854bd11b2cdc --- /dev/null +++ b/vendor/github.com/buger/jsonparser/fuzz.go @@ -0,0 +1,117 @@ +package jsonparser + +func FuzzParseString(data []byte) int { + r, err := ParseString(data) + if err != nil || r == "" { + return 0 + } + return 1 +} + +func FuzzEachKey(data []byte) int { + paths := [][]string{ + {"name"}, + {"order"}, + {"nested", "a"}, + {"nested", "b"}, + {"nested2", "a"}, + {"nested", "nested3", "b"}, + {"arr", "[1]", "b"}, + {"arrInt", "[3]"}, + {"arrInt", "[5]"}, + {"nested"}, + {"arr", "["}, + {"a\n", "b\n"}, + } + EachKey(data, func(idx int, value []byte, vt ValueType, err error) {}, paths...) + return 1 +} + +func FuzzDelete(data []byte) int { + Delete(data, "test") + return 1 +} + +func FuzzSet(data []byte) int { + _, err := Set(data, []byte(`"new value"`), "test") + if err != nil { + return 0 + } + return 1 +} + +func FuzzObjectEach(data []byte) int { + _ = ObjectEach(data, func(key, value []byte, valueType ValueType, off int) error { + return nil + }) + return 1 +} + +func FuzzParseFloat(data []byte) int { + _, err := ParseFloat(data) + if err != nil { + return 0 + } + return 1 +} + +func FuzzParseInt(data []byte) int { + _, err := ParseInt(data) + if err != nil { + return 0 + } + return 1 +} + +func FuzzParseBool(data []byte) int { + _, err := ParseBoolean(data) + if err != nil { + return 0 + } + return 1 +} + +func FuzzTokenStart(data []byte) int { + _ = tokenStart(data) + return 1 +} + +func FuzzGetString(data []byte) int { + _, err := GetString(data, "test") + if err != nil { + return 0 + } + return 1 +} + +func FuzzGetFloat(data []byte) int { + _, err := GetFloat(data, "test") + if err != nil { + return 0 + } + return 1 +} + +func FuzzGetInt(data []byte) int { + _, err := GetInt(data, "test") + if err != nil { + return 0 + } + return 1 +} + +func FuzzGetBoolean(data []byte) int { + _, err := GetBoolean(data, "test") + if err != nil { + return 0 + } + return 1 +} + +func FuzzGetUnsafeString(data []byte) int { + _, err := GetUnsafeString(data, "test") + if err != nil { + return 0 + } + return 1 +} diff --git a/vendor/github.com/buger/jsonparser/oss-fuzz-build.sh b/vendor/github.com/buger/jsonparser/oss-fuzz-build.sh new file mode 100644 index 000000000000..c573b0e2d104 --- /dev/null +++ b/vendor/github.com/buger/jsonparser/oss-fuzz-build.sh @@ -0,0 +1,47 @@ +#!/bin/bash -eu + +git clone https://github.com/dvyukov/go-fuzz-corpus +zip corpus.zip go-fuzz-corpus/json/corpus/* + +cp corpus.zip $OUT/fuzzparsestring_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzParseString fuzzparsestring + +cp corpus.zip $OUT/fuzzeachkey_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzEachKey fuzzeachkey + +cp corpus.zip $OUT/fuzzdelete_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzDelete fuzzdelete + +cp corpus.zip $OUT/fuzzset_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzSet fuzzset + +cp corpus.zip $OUT/fuzzobjecteach_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzObjectEach fuzzobjecteach + +cp corpus.zip $OUT/fuzzparsefloat_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzParseFloat fuzzparsefloat + +cp corpus.zip $OUT/fuzzparseint_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzParseInt fuzzparseint + +cp corpus.zip $OUT/fuzzparsebool_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzParseBool fuzzparsebool + +cp corpus.zip $OUT/fuzztokenstart_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzTokenStart fuzztokenstart + +cp corpus.zip $OUT/fuzzgetstring_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzGetString fuzzgetstring + +cp corpus.zip $OUT/fuzzgetfloat_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzGetFloat fuzzgetfloat + +cp corpus.zip $OUT/fuzzgetint_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzGetInt fuzzgetint + +cp corpus.zip $OUT/fuzzgetboolean_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzGetBoolean fuzzgetboolean + +cp corpus.zip $OUT/fuzzgetunsafestring_seed_corpus.zip +compile_go_fuzzer github.com/buger/jsonparser FuzzGetUnsafeString fuzzgetunsafestring + diff --git a/vendor/github.com/buger/jsonparser/parser.go b/vendor/github.com/buger/jsonparser/parser.go new file mode 100644 index 000000000000..14b80bc4838c --- /dev/null +++ b/vendor/github.com/buger/jsonparser/parser.go @@ -0,0 +1,1283 @@ +package jsonparser + +import ( + "bytes" + "errors" + "fmt" + "strconv" +) + +// Errors +var ( + KeyPathNotFoundError = errors.New("Key path not found") + UnknownValueTypeError = errors.New("Unknown value type") + MalformedJsonError = errors.New("Malformed JSON error") + MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol") + MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol") + MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol") + MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol") + OverflowIntegerError = errors.New("Value is number, but overflowed while parsing") + MalformedStringEscapeError = errors.New("Encountered an invalid escape sequence in a string") +) + +// How much stack space to allocate for unescaping JSON strings; if a string longer +// than this needs to be escaped, it will result in a heap allocation +const unescapeStackBufSize = 64 + +func tokenEnd(data []byte) int { + for i, c := range data { + switch c { + case ' ', '\n', '\r', '\t', ',', '}', ']': + return i + } + } + + return len(data) +} + +func findTokenStart(data []byte, token byte) int { + for i := len(data) - 1; i >= 0; i-- { + switch data[i] { + case token: + return i + case '[', '{': + return 0 + } + } + + return 0 +} + +func findKeyStart(data []byte, key string) (int, error) { + i := 0 + ln := len(data) + if ln > 0 && (data[0] == '{' || data[0] == '[') { + i = 1 + } + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + + if ku, err := Unescape(StringToBytes(key), stackbuf[:]); err == nil { + key = bytesToString(&ku) + } + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd, keyEscaped := stringEnd(data[i:]) + if strEnd == -1 { + break + } + i += strEnd + keyEnd := i - 1 + + valueOffset := nextToken(data[i:]) + if valueOffset == -1 { + break + } + + i += valueOffset + + // if string is a key, and key level match + k := data[keyBegin:keyEnd] + // for unescape: if there are no escape sequences, this is cheap; if there are, it is a + // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize + if keyEscaped { + if ku, err := Unescape(k, stackbuf[:]); err != nil { + break + } else { + k = ku + } + } + + if data[i] == ':' && len(key) == len(k) && bytesToString(&k) == key { + return keyBegin - 1, nil + } + + case '[': + end := blockEnd(data[i:], data[i], ']') + if end != -1 { + i = i + end + } + case '{': + end := blockEnd(data[i:], data[i], '}') + if end != -1 { + i = i + end + } + } + i++ + } + + return -1, KeyPathNotFoundError +} + +func tokenStart(data []byte) int { + for i := len(data) - 1; i >= 0; i-- { + switch data[i] { + case '\n', '\r', '\t', ',', '{', '[': + return i + } + } + + return 0 +} + +// Find position of next character which is not whitespace +func nextToken(data []byte) int { + for i, c := range data { + switch c { + case ' ', '\n', '\r', '\t': + continue + default: + return i + } + } + + return -1 +} + +// Find position of last character which is not whitespace +func lastToken(data []byte) int { + for i := len(data) - 1; i >= 0; i-- { + switch data[i] { + case ' ', '\n', '\r', '\t': + continue + default: + return i + } + } + + return -1 +} + +// Tries to find the end of string +// Support if string contains escaped quote symbols. +func stringEnd(data []byte) (int, bool) { + escaped := false + for i, c := range data { + if c == '"' { + if !escaped { + return i + 1, false + } else { + j := i - 1 + for { + if j < 0 || data[j] != '\\' { + return i + 1, true // even number of backslashes + } + j-- + if j < 0 || data[j] != '\\' { + break // odd number of backslashes + } + j-- + + } + } + } else if c == '\\' { + escaped = true + } + } + + return -1, escaped +} + +// Find end of the data structure, array or object. +// For array openSym and closeSym will be '[' and ']', for object '{' and '}' +func blockEnd(data []byte, openSym byte, closeSym byte) int { + level := 0 + i := 0 + ln := len(data) + + for i < ln { + switch data[i] { + case '"': // If inside string, skip it + se, _ := stringEnd(data[i+1:]) + if se == -1 { + return -1 + } + i += se + case openSym: // If open symbol, increase level + level++ + case closeSym: // If close symbol, increase level + level-- + + // If we have returned to the original level, we're done + if level == 0 { + return i + 1 + } + } + i++ + } + + return -1 +} + +func searchKeys(data []byte, keys ...string) int { + keyLevel := 0 + level := 0 + i := 0 + ln := len(data) + lk := len(keys) + lastMatched := true + + if lk == 0 { + return 0 + } + + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd, keyEscaped := stringEnd(data[i:]) + if strEnd == -1 { + return -1 + } + i += strEnd + keyEnd := i - 1 + + valueOffset := nextToken(data[i:]) + if valueOffset == -1 { + return -1 + } + + i += valueOffset + + // if string is a key + if data[i] == ':' { + if level < 1 { + return -1 + } + + key := data[keyBegin:keyEnd] + + // for unescape: if there are no escape sequences, this is cheap; if there are, it is a + // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize + var keyUnesc []byte + if !keyEscaped { + keyUnesc = key + } else if ku, err := Unescape(key, stackbuf[:]); err != nil { + return -1 + } else { + keyUnesc = ku + } + + if level <= len(keys) { + if equalStr(&keyUnesc, keys[level-1]) { + lastMatched = true + + // if key level match + if keyLevel == level-1 { + keyLevel++ + // If we found all keys in path + if keyLevel == lk { + return i + 1 + } + } + } else { + lastMatched = false + } + } else { + return -1 + } + } else { + i-- + } + case '{': + + // in case parent key is matched then only we will increase the level otherwise can directly + // can move to the end of this block + if !lastMatched { + end := blockEnd(data[i:], '{', '}') + if end == -1 { + return -1 + } + i += end - 1 + } else { + level++ + } + case '}': + level-- + if level == keyLevel { + keyLevel-- + } + case '[': + // If we want to get array element by index + if keyLevel == level && keys[level][0] == '[' { + var keyLen = len(keys[level]) + if keyLen < 3 || keys[level][0] != '[' || keys[level][keyLen-1] != ']' { + return -1 + } + aIdx, err := strconv.Atoi(keys[level][1 : keyLen-1]) + if err != nil { + return -1 + } + var curIdx int + var valueFound []byte + var valueOffset int + var curI = i + ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) { + if curIdx == aIdx { + valueFound = value + valueOffset = offset + if dataType == String { + valueOffset = valueOffset - 2 + valueFound = data[curI+valueOffset : curI+valueOffset+len(value)+2] + } + } + curIdx += 1 + }) + + if valueFound == nil { + return -1 + } else { + subIndex := searchKeys(valueFound, keys[level+1:]...) + if subIndex < 0 { + return -1 + } + return i + valueOffset + subIndex + } + } else { + // Do not search for keys inside arrays + if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 { + return -1 + } else { + i += arraySkip - 1 + } + } + case ':': // If encountered, JSON data is malformed + return -1 + } + + i++ + } + + return -1 +} + +func sameTree(p1, p2 []string) bool { + minLen := len(p1) + if len(p2) < minLen { + minLen = len(p2) + } + + for pi_1, p_1 := range p1[:minLen] { + if p2[pi_1] != p_1 { + return false + } + } + + return true +} + +func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int { + var x struct{} + pathFlags := make([]bool, len(paths)) + var level, pathsMatched, i int + ln := len(data) + + var maxPath int + for _, p := range paths { + if len(p) > maxPath { + maxPath = len(p) + } + } + + pathsBuf := make([]string, maxPath) + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd, keyEscaped := stringEnd(data[i:]) + if strEnd == -1 { + return -1 + } + i += strEnd + + keyEnd := i - 1 + + valueOffset := nextToken(data[i:]) + if valueOffset == -1 { + return -1 + } + + i += valueOffset + + // if string is a key, and key level match + if data[i] == ':' { + match := -1 + key := data[keyBegin:keyEnd] + + // for unescape: if there are no escape sequences, this is cheap; if there are, it is a + // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize + var keyUnesc []byte + if !keyEscaped { + keyUnesc = key + } else { + var stackbuf [unescapeStackBufSize]byte + if ku, err := Unescape(key, stackbuf[:]); err != nil { + return -1 + } else { + keyUnesc = ku + } + } + + if maxPath >= level { + if level < 1 { + cb(-1, nil, Unknown, MalformedJsonError) + return -1 + } + + pathsBuf[level-1] = bytesToString(&keyUnesc) + for pi, p := range paths { + if len(p) != level || pathFlags[pi] || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) { + continue + } + + match = pi + + pathsMatched++ + pathFlags[pi] = true + + v, dt, _, e := Get(data[i+1:]) + cb(pi, v, dt, e) + + if pathsMatched == len(paths) { + break + } + } + if pathsMatched == len(paths) { + return i + } + } + + if match == -1 { + tokenOffset := nextToken(data[i+1:]) + i += tokenOffset + + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 + } + } + + if i < ln { + switch data[i] { + case '{', '}', '[', '"': + i-- + } + } + } else { + i-- + } + case '{': + level++ + case '}': + level-- + case '[': + var ok bool + arrIdxFlags := make(map[int]struct{}) + pIdxFlags := make([]bool, len(paths)) + + if level < 0 { + cb(-1, nil, Unknown, MalformedJsonError) + return -1 + } + + for pi, p := range paths { + if len(p) < level+1 || pathFlags[pi] || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) { + continue + } + if len(p[level]) >= 2 { + aIdx, _ := strconv.Atoi(p[level][1 : len(p[level])-1]) + arrIdxFlags[aIdx] = x + pIdxFlags[pi] = true + } + } + + if len(arrIdxFlags) > 0 { + level++ + + var curIdx int + arrOff, _ := ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) { + if _, ok = arrIdxFlags[curIdx]; ok { + for pi, p := range paths { + if pIdxFlags[pi] { + aIdx, _ := strconv.Atoi(p[level-1][1 : len(p[level-1])-1]) + + if curIdx == aIdx { + of := searchKeys(value, p[level:]...) + + pathsMatched++ + pathFlags[pi] = true + + if of != -1 { + v, dt, _, e := Get(value[of:]) + cb(pi, v, dt, e) + } + } + } + } + } + + curIdx += 1 + }) + + if pathsMatched == len(paths) { + return i + } + + i += arrOff - 1 + } else { + // Do not search for keys inside arrays + if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 { + return -1 + } else { + i += arraySkip - 1 + } + } + case ']': + level-- + } + + i++ + } + + return -1 +} + +// Data types available in valid JSON data. +type ValueType int + +const ( + NotExist = ValueType(iota) + String + Number + Object + Array + Boolean + Null + Unknown +) + +func (vt ValueType) String() string { + switch vt { + case NotExist: + return "non-existent" + case String: + return "string" + case Number: + return "number" + case Object: + return "object" + case Array: + return "array" + case Boolean: + return "boolean" + case Null: + return "null" + default: + return "unknown" + } +} + +var ( + trueLiteral = []byte("true") + falseLiteral = []byte("false") + nullLiteral = []byte("null") +) + +func createInsertComponent(keys []string, setValue []byte, comma, object bool) []byte { + isIndex := string(keys[0][0]) == "[" + offset := 0 + lk := calcAllocateSpace(keys, setValue, comma, object) + buffer := make([]byte, lk, lk) + if comma { + offset += WriteToBuffer(buffer[offset:], ",") + } + if isIndex && !comma { + offset += WriteToBuffer(buffer[offset:], "[") + } else { + if object { + offset += WriteToBuffer(buffer[offset:], "{") + } + if !isIndex { + offset += WriteToBuffer(buffer[offset:], "\"") + offset += WriteToBuffer(buffer[offset:], keys[0]) + offset += WriteToBuffer(buffer[offset:], "\":") + } + } + + for i := 1; i < len(keys); i++ { + if string(keys[i][0]) == "[" { + offset += WriteToBuffer(buffer[offset:], "[") + } else { + offset += WriteToBuffer(buffer[offset:], "{\"") + offset += WriteToBuffer(buffer[offset:], keys[i]) + offset += WriteToBuffer(buffer[offset:], "\":") + } + } + offset += WriteToBuffer(buffer[offset:], string(setValue)) + for i := len(keys) - 1; i > 0; i-- { + if string(keys[i][0]) == "[" { + offset += WriteToBuffer(buffer[offset:], "]") + } else { + offset += WriteToBuffer(buffer[offset:], "}") + } + } + if isIndex && !comma { + offset += WriteToBuffer(buffer[offset:], "]") + } + if object && !isIndex { + offset += WriteToBuffer(buffer[offset:], "}") + } + return buffer +} + +func calcAllocateSpace(keys []string, setValue []byte, comma, object bool) int { + isIndex := string(keys[0][0]) == "[" + lk := 0 + if comma { + // , + lk += 1 + } + if isIndex && !comma { + // [] + lk += 2 + } else { + if object { + // { + lk += 1 + } + if !isIndex { + // "keys[0]" + lk += len(keys[0]) + 3 + } + } + + + lk += len(setValue) + for i := 1; i < len(keys); i++ { + if string(keys[i][0]) == "[" { + // [] + lk += 2 + } else { + // {"keys[i]":setValue} + lk += len(keys[i]) + 5 + } + } + + if object && !isIndex { + // } + lk += 1 + } + + return lk +} + +func WriteToBuffer(buffer []byte, str string) int { + copy(buffer, str) + return len(str) +} + +/* + +Del - Receives existing data structure, path to delete. + +Returns: +`data` - return modified data + +*/ +func Delete(data []byte, keys ...string) []byte { + lk := len(keys) + if lk == 0 { + return data[:0] + } + + array := false + if len(keys[lk-1]) > 0 && string(keys[lk-1][0]) == "[" { + array = true + } + + var startOffset, keyOffset int + endOffset := len(data) + var err error + if !array { + if len(keys) > 1 { + _, _, startOffset, endOffset, err = internalGet(data, keys[:lk-1]...) + if err == KeyPathNotFoundError { + // problem parsing the data + return data + } + } + + keyOffset, err = findKeyStart(data[startOffset:endOffset], keys[lk-1]) + if err == KeyPathNotFoundError { + // problem parsing the data + return data + } + keyOffset += startOffset + _, _, _, subEndOffset, _ := internalGet(data[startOffset:endOffset], keys[lk-1]) + endOffset = startOffset + subEndOffset + tokEnd := tokenEnd(data[endOffset:]) + tokStart := findTokenStart(data[:keyOffset], ","[0]) + + if data[endOffset+tokEnd] == ","[0] { + endOffset += tokEnd + 1 + } else if data[endOffset+tokEnd] == " "[0] && len(data) > endOffset+tokEnd+1 && data[endOffset+tokEnd+1] == ","[0] { + endOffset += tokEnd + 2 + } else if data[endOffset+tokEnd] == "}"[0] && data[tokStart] == ","[0] { + keyOffset = tokStart + } + } else { + _, _, keyOffset, endOffset, err = internalGet(data, keys...) + if err == KeyPathNotFoundError { + // problem parsing the data + return data + } + + tokEnd := tokenEnd(data[endOffset:]) + tokStart := findTokenStart(data[:keyOffset], ","[0]) + + if data[endOffset+tokEnd] == ","[0] { + endOffset += tokEnd + 1 + } else if data[endOffset+tokEnd] == "]"[0] && data[tokStart] == ","[0] { + keyOffset = tokStart + } + } + + // We need to remove remaining trailing comma if we delete las element in the object + prevTok := lastToken(data[:keyOffset]) + remainedValue := data[endOffset:] + + var newOffset int + if nextToken(remainedValue) > -1 && remainedValue[nextToken(remainedValue)] == '}' && data[prevTok] == ',' { + newOffset = prevTok + } else { + newOffset = prevTok + 1 + } + + // We have to make a copy here if we don't want to mangle the original data, because byte slices are + // accessed by reference and not by value + dataCopy := make([]byte, len(data)) + copy(dataCopy, data) + data = append(dataCopy[:newOffset], dataCopy[endOffset:]...) + + return data +} + +/* + +Set - Receives existing data structure, path to set, and data to set at that key. + +Returns: +`value` - modified byte array +`err` - On any parsing error + +*/ +func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) { + // ensure keys are set + if len(keys) == 0 { + return nil, KeyPathNotFoundError + } + + _, _, startOffset, endOffset, err := internalGet(data, keys...) + if err != nil { + if err != KeyPathNotFoundError { + // problem parsing the data + return nil, err + } + // full path doesnt exist + // does any subpath exist? + var depth int + for i := range keys { + _, _, start, end, sErr := internalGet(data, keys[:i+1]...) + if sErr != nil { + break + } else { + endOffset = end + startOffset = start + depth++ + } + } + comma := true + object := false + if endOffset == -1 { + firstToken := nextToken(data) + // We can't set a top-level key if data isn't an object + if firstToken < 0 || data[firstToken] != '{' { + return nil, KeyPathNotFoundError + } + // Don't need a comma if the input is an empty object + secondToken := firstToken + 1 + nextToken(data[firstToken+1:]) + if data[secondToken] == '}' { + comma = false + } + // Set the top level key at the end (accounting for any trailing whitespace) + // This assumes last token is valid like '}', could check and return error + endOffset = lastToken(data) + } + depthOffset := endOffset + if depth != 0 { + // if subpath is a non-empty object, add to it + // or if subpath is a non-empty array, add to it + if (data[startOffset] == '{' && data[startOffset+1+nextToken(data[startOffset+1:])] != '}') || + (data[startOffset] == '[' && data[startOffset+1+nextToken(data[startOffset+1:])] == '{') && keys[depth:][0][0] == 91 { + depthOffset-- + startOffset = depthOffset + // otherwise, over-write it with a new object + } else { + comma = false + object = true + } + } else { + startOffset = depthOffset + } + value = append(data[:startOffset], append(createInsertComponent(keys[depth:], setValue, comma, object), data[depthOffset:]...)...) + } else { + // path currently exists + startComponent := data[:startOffset] + endComponent := data[endOffset:] + + value = make([]byte, len(startComponent)+len(endComponent)+len(setValue)) + newEndOffset := startOffset + len(setValue) + copy(value[0:startOffset], startComponent) + copy(value[startOffset:newEndOffset], setValue) + copy(value[newEndOffset:], endComponent) + } + return value, nil +} + +func getType(data []byte, offset int) ([]byte, ValueType, int, error) { + var dataType ValueType + endOffset := offset + + // if string value + if data[offset] == '"' { + dataType = String + if idx, _ := stringEnd(data[offset+1:]); idx != -1 { + endOffset += idx + 1 + } else { + return nil, dataType, offset, MalformedStringError + } + } else if data[offset] == '[' { // if array value + dataType = Array + // break label, for stopping nested loops + endOffset = blockEnd(data[offset:], '[', ']') + + if endOffset == -1 { + return nil, dataType, offset, MalformedArrayError + } + + endOffset += offset + } else if data[offset] == '{' { // if object value + dataType = Object + // break label, for stopping nested loops + endOffset = blockEnd(data[offset:], '{', '}') + + if endOffset == -1 { + return nil, dataType, offset, MalformedObjectError + } + + endOffset += offset + } else { + // Number, Boolean or None + end := tokenEnd(data[endOffset:]) + + if end == -1 { + return nil, dataType, offset, MalformedValueError + } + + value := data[offset : endOffset+end] + + switch data[offset] { + case 't', 'f': // true or false + if bytes.Equal(value, trueLiteral) || bytes.Equal(value, falseLiteral) { + dataType = Boolean + } else { + return nil, Unknown, offset, UnknownValueTypeError + } + case 'u', 'n': // undefined or null + if bytes.Equal(value, nullLiteral) { + dataType = Null + } else { + return nil, Unknown, offset, UnknownValueTypeError + } + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': + dataType = Number + default: + return nil, Unknown, offset, UnknownValueTypeError + } + + endOffset += end + } + return data[offset:endOffset], dataType, endOffset, nil +} + +/* +Get - Receives data structure, and key path to extract value from. + +Returns: +`value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error +`dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null` +`offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper. +`err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist` + +Accept multiple keys to specify path to JSON value (in case of quering nested structures). +If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation. +*/ +func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) { + a, b, _, d, e := internalGet(data, keys...) + return a, b, d, e +} + +func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType, offset, endOffset int, err error) { + if len(keys) > 0 { + if offset = searchKeys(data, keys...); offset == -1 { + return nil, NotExist, -1, -1, KeyPathNotFoundError + } + } + + // Go to closest value + nO := nextToken(data[offset:]) + if nO == -1 { + return nil, NotExist, offset, -1, MalformedJsonError + } + + offset += nO + value, dataType, endOffset, err = getType(data, offset) + if err != nil { + return value, dataType, offset, endOffset, err + } + + // Strip quotes from string values + if dataType == String { + value = value[1 : len(value)-1] + } + + return value[:len(value):len(value)], dataType, offset, endOffset, nil +} + +// ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`. +func ArrayEach(data []byte, cb func(value []byte, dataType ValueType, offset int, err error), keys ...string) (offset int, err error) { + if len(data) == 0 { + return -1, MalformedObjectError + } + + nT := nextToken(data) + if nT == -1 { + return -1, MalformedJsonError + } + + offset = nT + 1 + + if len(keys) > 0 { + if offset = searchKeys(data, keys...); offset == -1 { + return offset, KeyPathNotFoundError + } + + // Go to closest value + nO := nextToken(data[offset:]) + if nO == -1 { + return offset, MalformedJsonError + } + + offset += nO + + if data[offset] != '[' { + return offset, MalformedArrayError + } + + offset++ + } + + nO := nextToken(data[offset:]) + if nO == -1 { + return offset, MalformedJsonError + } + + offset += nO + + if data[offset] == ']' { + return offset, nil + } + + for true { + v, t, o, e := Get(data[offset:]) + + if e != nil { + return offset, e + } + + if o == 0 { + break + } + + if t != NotExist { + cb(v, t, offset+o-len(v), e) + } + + if e != nil { + break + } + + offset += o + + skipToToken := nextToken(data[offset:]) + if skipToToken == -1 { + return offset, MalformedArrayError + } + offset += skipToToken + + if data[offset] == ']' { + break + } + + if data[offset] != ',' { + return offset, MalformedArrayError + } + + offset++ + } + + return offset, nil +} + +// ObjectEach iterates over the key-value pairs of a JSON object, invoking a given callback for each such entry +func ObjectEach(data []byte, callback func(key []byte, value []byte, dataType ValueType, offset int) error, keys ...string) (err error) { + offset := 0 + + // Descend to the desired key, if requested + if len(keys) > 0 { + if off := searchKeys(data, keys...); off == -1 { + return KeyPathNotFoundError + } else { + offset = off + } + } + + // Validate and skip past opening brace + if off := nextToken(data[offset:]); off == -1 { + return MalformedObjectError + } else if offset += off; data[offset] != '{' { + return MalformedObjectError + } else { + offset++ + } + + // Skip to the first token inside the object, or stop if we find the ending brace + if off := nextToken(data[offset:]); off == -1 { + return MalformedJsonError + } else if offset += off; data[offset] == '}' { + return nil + } + + // Loop pre-condition: data[offset] points to what should be either the next entry's key, or the closing brace (if it's anything else, the JSON is malformed) + for offset < len(data) { + // Step 1: find the next key + var key []byte + + // Check what the the next token is: start of string, end of object, or something else (error) + switch data[offset] { + case '"': + offset++ // accept as string and skip opening quote + case '}': + return nil // we found the end of the object; stop and return success + default: + return MalformedObjectError + } + + // Find the end of the key string + var keyEscaped bool + if off, esc := stringEnd(data[offset:]); off == -1 { + return MalformedJsonError + } else { + key, keyEscaped = data[offset:offset+off-1], esc + offset += off + } + + // Unescape the string if needed + if keyEscaped { + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + if keyUnescaped, err := Unescape(key, stackbuf[:]); err != nil { + return MalformedStringEscapeError + } else { + key = keyUnescaped + } + } + + // Step 2: skip the colon + if off := nextToken(data[offset:]); off == -1 { + return MalformedJsonError + } else if offset += off; data[offset] != ':' { + return MalformedJsonError + } else { + offset++ + } + + // Step 3: find the associated value, then invoke the callback + if value, valueType, off, err := Get(data[offset:]); err != nil { + return err + } else if err := callback(key, value, valueType, offset+off); err != nil { // Invoke the callback here! + return err + } else { + offset += off + } + + // Step 4: skip over the next comma to the following token, or stop if we hit the ending brace + if off := nextToken(data[offset:]); off == -1 { + return MalformedArrayError + } else { + offset += off + switch data[offset] { + case '}': + return nil // Stop if we hit the close brace + case ',': + offset++ // Ignore the comma + default: + return MalformedObjectError + } + } + + // Skip to the next token after the comma + if off := nextToken(data[offset:]); off == -1 { + return MalformedArrayError + } else { + offset += off + } + } + + return MalformedObjectError // we shouldn't get here; it's expected that we will return via finding the ending brace +} + +// GetUnsafeString returns the value retrieved by `Get`, use creates string without memory allocation by mapping string to slice memory. It does not handle escape symbols. +func GetUnsafeString(data []byte, keys ...string) (val string, err error) { + v, _, _, e := Get(data, keys...) + + if e != nil { + return "", e + } + + return bytesToString(&v), nil +} + +// GetString returns the value retrieved by `Get`, cast to a string if possible, trying to properly handle escape and utf8 symbols +// If key data type do not match, it will return an error. +func GetString(data []byte, keys ...string) (val string, err error) { + v, t, _, e := Get(data, keys...) + + if e != nil { + return "", e + } + + if t != String { + return "", fmt.Errorf("Value is not a string: %s", string(v)) + } + + // If no escapes return raw content + if bytes.IndexByte(v, '\\') == -1 { + return string(v), nil + } + + return ParseString(v) +} + +// GetFloat returns the value retrieved by `Get`, cast to a float64 if possible. +// The offset is the same as in `Get`. +// If key data type do not match, it will return an error. +func GetFloat(data []byte, keys ...string) (val float64, err error) { + v, t, _, e := Get(data, keys...) + + if e != nil { + return 0, e + } + + if t != Number { + return 0, fmt.Errorf("Value is not a number: %s", string(v)) + } + + return ParseFloat(v) +} + +// GetInt returns the value retrieved by `Get`, cast to a int64 if possible. +// If key data type do not match, it will return an error. +func GetInt(data []byte, keys ...string) (val int64, err error) { + v, t, _, e := Get(data, keys...) + + if e != nil { + return 0, e + } + + if t != Number { + return 0, fmt.Errorf("Value is not a number: %s", string(v)) + } + + return ParseInt(v) +} + +// GetBoolean returns the value retrieved by `Get`, cast to a bool if possible. +// The offset is the same as in `Get`. +// If key data type do not match, it will return error. +func GetBoolean(data []byte, keys ...string) (val bool, err error) { + v, t, _, e := Get(data, keys...) + + if e != nil { + return false, e + } + + if t != Boolean { + return false, fmt.Errorf("Value is not a boolean: %s", string(v)) + } + + return ParseBoolean(v) +} + +// ParseBoolean parses a Boolean ValueType into a Go bool (not particularly useful, but here for completeness) +func ParseBoolean(b []byte) (bool, error) { + switch { + case bytes.Equal(b, trueLiteral): + return true, nil + case bytes.Equal(b, falseLiteral): + return false, nil + default: + return false, MalformedValueError + } +} + +// ParseString parses a String ValueType into a Go string (the main parsing work is unescaping the JSON string) +func ParseString(b []byte) (string, error) { + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + if bU, err := Unescape(b, stackbuf[:]); err != nil { + return "", MalformedValueError + } else { + return string(bU), nil + } +} + +// ParseNumber parses a Number ValueType into a Go float64 +func ParseFloat(b []byte) (float64, error) { + if v, err := parseFloat(&b); err != nil { + return 0, MalformedValueError + } else { + return v, nil + } +} + +// ParseInt parses a Number ValueType into a Go int64 +func ParseInt(b []byte) (int64, error) { + if v, ok, overflow := parseInt(b); !ok { + if overflow { + return 0, OverflowIntegerError + } + return 0, MalformedValueError + } else { + return v, nil + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 753823cc94f4..f0152af75d4c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -470,6 +470,9 @@ github.com/beorn7/perks/quantile # github.com/bmatcuk/doublestar v1.3.4 ## explicit; go 1.12 github.com/bmatcuk/doublestar +# github.com/buger/jsonparser v1.1.1 +## explicit; go 1.13 +github.com/buger/jsonparser # github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b ## explicit github.com/c2h5oh/datasize