From 133f42ce2d99d6adb6b5885af6d20939dc4b6fe9 Mon Sep 17 00:00:00 2001 From: tidwall Date: Tue, 1 Oct 2024 20:45:20 -0700 Subject: [PATCH] Generally faster parsing This commit includes an optimization that increases overall performance. The gains are roughly between 20% to 300% depending on the size of the JSON document. Larger documents will see the greates gains, particularly when searching for keys that are deeply embedded, or near the end of the document. --- README.md | 4 +- gjson.go | 160 +++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 130 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index fc5dea5..387766d 100644 --- a/README.md +++ b/README.md @@ -438,7 +438,7 @@ Benchmarks of GJSON alongside [encoding/json](https://golang.org/pkg/encoding/js and [json-iterator](https://github.com/json-iterator/go) ``` -BenchmarkGJSONGet-10 14919366 240.9 ns/op 0 B/op 0 allocs/op +BenchmarkGJSONGet-10 17893731 202.1 ns/op 0 B/op 0 allocs/op BenchmarkGJSONUnmarshalMap-10 1663548 2157 ns/op 1920 B/op 26 allocs/op BenchmarkJSONUnmarshalMap-10 832236 4279 ns/op 2920 B/op 68 allocs/op BenchmarkJSONUnmarshalStruct-10 1076475 3219 ns/op 920 B/op 12 allocs/op @@ -489,6 +489,4 @@ widget.text.onMouseUp ** -Last run: Oct 1, 2024 - *These benchmarks were run on a MacBook Pro M1 Max using Go 1.22 and can be found [here](https://github.com/tidwall/gjson-benchmarks).* diff --git a/gjson.go b/gjson.go index 779fe61..5aa2a4f 100644 --- a/gjson.go +++ b/gjson.go @@ -1040,6 +1040,10 @@ func parseObjectPath(path string) (r objectPathResult) { return } +var vchars = [256]byte{ + '"': 2, '{': 3, '(': 3, '[': 3, '}': 1, ')': 1, ']': 1, +} + func parseSquash(json string, i int) (int, string) { // expects that the lead character is a '[' or '{' or '(' // squash the value, ignoring all nested arrays and objects. @@ -1047,43 +1051,137 @@ func parseSquash(json string, i int) (int, string) { s := i i++ depth := 1 - for ; i < len(json); i++ { - if json[i] >= '"' && json[i] <= '}' { - switch json[i] { - case '"': + var c byte + for i < len(json) { + for i < len(json)-8 { + jslice := json[i : i+8] + c = vchars[jslice[0]] + if c != 0 { + i += 0 + goto token + } + c = vchars[jslice[1]] + if c != 0 { + i += 1 + goto token + } + c = vchars[jslice[2]] + if c != 0 { + i += 2 + goto token + } + c = vchars[jslice[3]] + if c != 0 { + i += 3 + goto token + } + c = vchars[jslice[4]] + if c != 0 { + i += 4 + goto token + } + c = vchars[jslice[5]] + if c != 0 { + i += 5 + goto token + } + c = vchars[jslice[6]] + if c != 0 { + i += 6 + goto token + } + c = vchars[jslice[7]] + if c != 0 { + i += 7 + goto token + } + i += 8 + } + c = vchars[json[i]] + if c == 0 { + i++ + continue + } + token: + if c == 2 { + // '"' string + i++ + s2 := i + nextquote: + for i < len(json)-8 { + jslice := json[i : i+8] + if jslice[0] == '"' { + i += 0 + goto strchkesc + } + if jslice[1] == '"' { + i += 1 + goto strchkesc + } + if jslice[2] == '"' { + i += 2 + goto strchkesc + } + if jslice[3] == '"' { + i += 3 + goto strchkesc + } + if jslice[4] == '"' { + i += 4 + goto strchkesc + } + if jslice[5] == '"' { + i += 5 + goto strchkesc + } + if jslice[6] == '"' { + i += 6 + goto strchkesc + } + if jslice[7] == '"' { + i += 7 + goto strchkesc + } + i += 8 + } + goto strchkstd + strchkesc: + if json[i-1] != '\\' { i++ - s2 := i - for ; i < len(json); i++ { - if json[i] > '\\' { - continue - } - if json[i] == '"' { - // look for an escaped slash - if json[i-1] == '\\' { - n := 0 - for j := i - 2; j > s2-1; j-- { - if json[j] != '\\' { - break - } - n++ - } - if n%2 == 0 { - continue - } + continue + } + strchkstd: + for i < len(json) { + if json[i] > '\\' || json[i] != '"' { + i++ + continue + } + // look for an escaped slash + if json[i-1] == '\\' { + n := 0 + for j := i - 2; j > s2-1; j-- { + if json[j] != '\\' { + break } - break + n++ + } + if n%2 == 0 { + i++ + goto nextquote } } - case '{', '[', '(': - depth++ - case '}', ']', ')': - depth-- - if depth == 0 { - i++ - return i, json[s:i] - } + break + } + } else { + // '{', '[', '(', '}', ']', ')' + // open close tokens + depth += int(c) - 2 + if depth == 0 { + i++ + return i, json[s:i] } } + i++ } return i, json[s:] }