From 15c84037fadc337f2949710e9ea35fd7def6961c Mon Sep 17 00:00:00 2001 From: ee7 <45465154+ee7@users.noreply.github.com> Date: Thu, 10 Jun 2021 14:52:37 +0200 Subject: [PATCH] lint, sync: make JSON parsing stricter (#346) The Nim stdlib JSON modules parse JSON in a lenient way: they silently ignore: - a comma after the last item in an array or object - a line comment with `//` - a potentially-multi-line comment with `/* */` This is useful for parsing real-world JSON, but is bad for our use case of validating JSON. JSON that contains a trailing comma or such comments is technically invalid [1], and less portable. Most JSON parsers produce an error for a trailing comma, including the Ruby library that Exercism uses. Therefore `configlet lint` should produce an error for a trailing comma, so that a track doesn't merge a PR that contains one. This commit adds our own copies of the Nim JSON modules to our repo, and patches them to raise an exception for these cases. Each module was copied from the latest version on Nim's `version-1-4` branch. In the future, we will pull in upstream changes to these files when we update the Nim version. This approach is better than calling something like `jq` because it: - doesn't require/prefer the user to have an extra program installed - doesn't special-case running in CI - doesn't require refactoring our codebase - is much faster We use the somewhat-obscure `nimscript.patchFile` [2] mechanism to override the location of the `json` and `parsejson` modules, so that we can keep `std/json` in our import statements. Instead of the `patchFile` approach, it would also be possible to apply some transformation to the AST of the relevant procs at compile-time, but that's less readable and harder to maintain. Our approach might be obsoleted in the future - for example, if we ever move to some other JSON library, or if the upstream `parseJson` gains e.g. an `allowTrailingCommas = true` argument. [1] See e.g. https://www.json.org/json-en.html [2] https://nim-lang.org/docs/nimscript.html#patchFile%2Cstring%2Cstring%2Cstring Closes: #345 See also: #312 --- config.nims | 4 + src/patched_stdlib/json.nim | 1352 ++++++++++++++++++++++++++++++ src/patched_stdlib/parsejson.nim | 507 +++++++++++ tests/all_tests.nim | 1 + tests/test_json.nim | 129 +++ 5 files changed, 1993 insertions(+) create mode 100644 src/patched_stdlib/json.nim create mode 100644 src/patched_stdlib/parsejson.nim create mode 100644 tests/test_json.nim diff --git a/config.nims b/config.nims index 11ae36df..59b79788 100644 --- a/config.nims +++ b/config.nims @@ -2,6 +2,10 @@ switch("styleCheck", "hint") hint("Name", on) switch("experimental", "strictFuncs") +# Replace the stdlib JSON modules with our own stricter versions. +patchFile("stdlib", "json", "src/patched_stdlib/json") +patchFile("stdlib", "parsejson", "src/patched_stdlib/parsejson") + if defined(release): switch("opt", "size") switch("passC", "-flto") diff --git a/src/patched_stdlib/json.nim b/src/patched_stdlib/json.nim new file mode 100644 index 00000000..14577270 --- /dev/null +++ b/src/patched_stdlib/json.nim @@ -0,0 +1,1352 @@ +# This file is minimally adapted from this version in the Nim standard library: +# https://github.com/nim-lang/Nim/blob/82561f31eb8e/lib/pure/json.nim +# The standard library version is lenient: it silently allows a trailing comma. +# The below version is stricter: it raises a `JsonParsingError` for a trailing +# comma. + +# Copyright (c) 2015 Andreas Rumpf, Dominik Picheta + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +## This module implements a simple high performance `JSON`:idx: +## parser. JSON (JavaScript Object Notation) is a lightweight +## data-interchange format that is easy for humans to read and write +## (unlike XML). It is easy for machines to parse and generate. +## JSON is based on a subset of the JavaScript Programming Language, +## Standard ECMA-262 3rd Edition - December 1999. +## +## Overview +## ======== +## +## Parsing JSON +## ------------ +## +## JSON often arrives into your program (via an API or a file) as a ``string``. +## The first step is to change it from its serialized form into a nested object +## structure called a ``JsonNode``. +## +## The ``parseJson`` procedure takes a string containing JSON and returns a +## ``JsonNode`` object. This is an object variant and it is either a +## ``JObject``, ``JArray``, ``JString``, ``JInt``, ``JFloat``, ``JBool`` or +## ``JNull``. You check the kind of this object variant by using the ``kind`` +## accessor. +## +## For a ``JsonNode`` who's kind is ``JObject``, you can access its fields using +## the ``[]`` operator. The following example shows how to do this: +## +## .. code-block:: Nim +## import json +## +## let jsonNode = parseJson("""{"key": 3.14}""") +## +## doAssert jsonNode.kind == JObject +## doAssert jsonNode["key"].kind == JFloat +## +## Reading values +## -------------- +## +## Once you have a ``JsonNode``, retrieving the values can then be achieved +## by using one of the helper procedures, which include: +## +## * ``getInt`` +## * ``getFloat`` +## * ``getStr`` +## * ``getBool`` +## +## To retrieve the value of ``"key"`` you can do the following: +## +## .. code-block:: Nim +## import json +## +## let jsonNode = parseJson("""{"key": 3.14}""") +## +## doAssert jsonNode["key"].getFloat() == 3.14 +## +## **Important:** The ``[]`` operator will raise an exception when the +## specified field does not exist. +## +## Handling optional keys +## ---------------------- +## +## By using the ``{}`` operator instead of ``[]``, it will return ``nil`` +## when the field is not found. The ``get``-family of procedures will return a +## type's default value when called on ``nil``. +## +## .. code-block:: Nim +## import json +## +## let jsonNode = parseJson("{}") +## +## doAssert jsonNode{"nope"}.getInt() == 0 +## doAssert jsonNode{"nope"}.getFloat() == 0 +## doAssert jsonNode{"nope"}.getStr() == "" +## doAssert jsonNode{"nope"}.getBool() == false +## +## Using default values +## -------------------- +## +## The ``get``-family helpers also accept an additional parameter which allow +## you to fallback to a default value should the key's values be ``null``: +## +## .. code-block:: Nim +## import json +## +## let jsonNode = parseJson("""{"key": 3.14, "key2": null}""") +## +## doAssert jsonNode["key"].getFloat(6.28) == 3.14 +## doAssert jsonNode["key2"].getFloat(3.14) == 3.14 +## doAssert jsonNode{"nope"}.getFloat(3.14) == 3.14 # note the {} +## +## Unmarshalling +## ------------- +## +## In addition to reading dynamic data, Nim can also unmarshal JSON directly +## into a type with the ``to`` macro. +## +## Note: Use `Option `_ for keys sometimes missing in json +## responses, and backticks around keys with a reserved keyword as name. +## +## .. code-block:: Nim +## import json +## import options +## +## type +## User = object +## name: string +## age: int +## `type`: Option[string] +## +## let userJson = parseJson("""{ "name": "Nim", "age": 12 }""") +## let user = to(userJson, User) +## if user.`type`.isSome(): +## assert user.`type`.get() != "robot" +## +## Creating JSON +## ============= +## +## This module can also be used to comfortably create JSON using the ``%*`` +## operator: +## +## .. code-block:: nim +## import json +## +## var hisName = "John" +## let herAge = 31 +## var j = %* +## [ +## { "name": hisName, "age": 30 }, +## { "name": "Susan", "age": herAge } +## ] +## +## var j2 = %* {"name": "Isaac", "books": ["Robot Dreams"]} +## j2["details"] = %* {"age":35, "pi":3.1415} +## echo j2 +## +## See also: std/jsonutils for hookable json serialization/deserialization +## of arbitrary types. + +runnableExamples: + ## Note: for JObject, key ordering is preserved, unlike in some languages, + ## this is convenient for some use cases. Example: + type Foo = object + a1, a2, a0, a3, a4: int + doAssert $(%* Foo()) == """{"a1":0,"a2":0,"a0":0,"a3":0,"a4":0}""" + +import + hashes, tables, strutils, lexbase, streams, macros, parsejson + +import options # xxx remove this dependency using same approach as https://github.com/nim-lang/Nim/pull/14563 + +export + tables.`$` + +export + parsejson.JsonEventKind, parsejson.JsonError, JsonParser, JsonKindError, + open, close, str, getInt, getFloat, kind, getColumn, getLine, getFilename, + errorMsg, errorMsgExpected, next, JsonParsingError, raiseParseErr, nimIdentNormalize + +type + JsonNodeKind* = enum ## possible JSON node types + JNull, + JBool, + JInt, + JFloat, + JString, + JObject, + JArray + + JsonNode* = ref JsonNodeObj ## JSON node + JsonNodeObj* {.acyclic.} = object + isUnquoted: bool # the JString was a number-like token and + # so shouldn't be quoted + case kind*: JsonNodeKind + of JString: + str*: string + of JInt: + num*: BiggestInt + of JFloat: + fnum*: float + of JBool: + bval*: bool + of JNull: + nil + of JObject: + fields*: OrderedTable[string, JsonNode] + of JArray: + elems*: seq[JsonNode] + +proc newJString*(s: string): JsonNode = + ## Creates a new `JString JsonNode`. + result = JsonNode(kind: JString, str: s) + +proc newJRawNumber(s: string): JsonNode = + ## Creates a "raw JS number", that is a number that does not + ## fit into Nim's ``BiggestInt`` field. This is really a `JString` + ## with the additional information that it should be converted back + ## to the string representation without the quotes. + result = JsonNode(kind: JString, str: s, isUnquoted: true) + +proc newJStringMove(s: string): JsonNode = + result = JsonNode(kind: JString) + shallowCopy(result.str, s) + +proc newJInt*(n: BiggestInt): JsonNode = + ## Creates a new `JInt JsonNode`. + result = JsonNode(kind: JInt, num: n) + +proc newJFloat*(n: float): JsonNode = + ## Creates a new `JFloat JsonNode`. + result = JsonNode(kind: JFloat, fnum: n) + +proc newJBool*(b: bool): JsonNode = + ## Creates a new `JBool JsonNode`. + result = JsonNode(kind: JBool, bval: b) + +proc newJNull*(): JsonNode = + ## Creates a new `JNull JsonNode`. + result = JsonNode(kind: JNull) + +proc newJObject*(): JsonNode = + ## Creates a new `JObject JsonNode` + result = JsonNode(kind: JObject, fields: initOrderedTable[string, JsonNode](2)) + +proc newJArray*(): JsonNode = + ## Creates a new `JArray JsonNode` + result = JsonNode(kind: JArray, elems: @[]) + +proc getStr*(n: JsonNode, default: string = ""): string = + ## Retrieves the string value of a `JString JsonNode`. + ## + ## Returns ``default`` if ``n`` is not a ``JString``, or if ``n`` is nil. + if n.isNil or n.kind != JString: return default + else: return n.str + +proc getInt*(n: JsonNode, default: int = 0): int = + ## Retrieves the int value of a `JInt JsonNode`. + ## + ## Returns ``default`` if ``n`` is not a ``JInt``, or if ``n`` is nil. + if n.isNil or n.kind != JInt: return default + else: return int(n.num) + +proc getBiggestInt*(n: JsonNode, default: BiggestInt = 0): BiggestInt = + ## Retrieves the BiggestInt value of a `JInt JsonNode`. + ## + ## Returns ``default`` if ``n`` is not a ``JInt``, or if ``n`` is nil. + if n.isNil or n.kind != JInt: return default + else: return n.num + +proc getFloat*(n: JsonNode, default: float = 0.0): float = + ## Retrieves the float value of a `JFloat JsonNode`. + ## + ## Returns ``default`` if ``n`` is not a ``JFloat`` or ``JInt``, or if ``n`` is nil. + if n.isNil: return default + case n.kind + of JFloat: return n.fnum + of JInt: return float(n.num) + else: return default + +proc getBool*(n: JsonNode, default: bool = false): bool = + ## Retrieves the bool value of a `JBool JsonNode`. + ## + ## Returns ``default`` if ``n`` is not a ``JBool``, or if ``n`` is nil. + if n.isNil or n.kind != JBool: return default + else: return n.bval + +proc getFields*(n: JsonNode, + default = initOrderedTable[string, JsonNode](2)): + OrderedTable[string, JsonNode] = + ## Retrieves the key, value pairs of a `JObject JsonNode`. + ## + ## Returns ``default`` if ``n`` is not a ``JObject``, or if ``n`` is nil. + if n.isNil or n.kind != JObject: return default + else: return n.fields + +proc getElems*(n: JsonNode, default: seq[JsonNode] = @[]): seq[JsonNode] = + ## Retrieves the array of a `JArray JsonNode`. + ## + ## Returns ``default`` if ``n`` is not a ``JArray``, or if ``n`` is nil. + if n.isNil or n.kind != JArray: return default + else: return n.elems + +proc add*(father, child: JsonNode) = + ## Adds `child` to a JArray node `father`. + assert father.kind == JArray + father.elems.add(child) + +proc add*(obj: JsonNode, key: string, val: JsonNode) = + ## Sets a field from a `JObject`. + assert obj.kind == JObject + obj.fields[key] = val + +proc `%`*(s: string): JsonNode = + ## Generic constructor for JSON data. Creates a new `JString JsonNode`. + result = JsonNode(kind: JString, str: s) + +proc `%`*(n: uint): JsonNode = + ## Generic constructor for JSON data. Creates a new `JInt JsonNode`. + if n > cast[uint](int.high): + result = newJRawNumber($n) + else: + result = JsonNode(kind: JInt, num: BiggestInt(n)) + +proc `%`*(n: int): JsonNode = + ## Generic constructor for JSON data. Creates a new `JInt JsonNode`. + result = JsonNode(kind: JInt, num: n) + +proc `%`*(n: BiggestUInt): JsonNode = + ## Generic constructor for JSON data. Creates a new `JInt JsonNode`. + if n > cast[BiggestUInt](BiggestInt.high): + result = newJRawNumber($n) + else: + result = JsonNode(kind: JInt, num: BiggestInt(n)) + +proc `%`*(n: BiggestInt): JsonNode = + ## Generic constructor for JSON data. Creates a new `JInt JsonNode`. + result = JsonNode(kind: JInt, num: n) + +proc `%`*(n: float): JsonNode = + ## Generic constructor for JSON data. Creates a new `JFloat JsonNode`. + result = JsonNode(kind: JFloat, fnum: n) + +proc `%`*(b: bool): JsonNode = + ## Generic constructor for JSON data. Creates a new `JBool JsonNode`. + result = JsonNode(kind: JBool, bval: b) + +proc `%`*(keyVals: openArray[tuple[key: string, val: JsonNode]]): JsonNode = + ## Generic constructor for JSON data. Creates a new `JObject JsonNode` + if keyVals.len == 0: return newJArray() + result = newJObject() + for key, val in items(keyVals): result.fields[key] = val + +template `%`*(j: JsonNode): JsonNode = j + +proc `%`*[T](elements: openArray[T]): JsonNode = + ## Generic constructor for JSON data. Creates a new `JArray JsonNode` + result = newJArray() + for elem in elements: result.add(%elem) + +proc `%`*[T](table: Table[string, T]|OrderedTable[string, T]): JsonNode = + ## Generic constructor for JSON data. Creates a new ``JObject JsonNode``. + result = newJObject() + for k, v in table: result[k] = %v + +proc `%`*[T](opt: Option[T]): JsonNode = + ## Generic constructor for JSON data. Creates a new ``JNull JsonNode`` + ## if ``opt`` is empty, otherwise it delegates to the underlying value. + if opt.isSome: %opt.get else: newJNull() + +when false: + # For 'consistency' we could do this, but that only pushes people further + # into that evil comfort zone where they can use Nim without understanding it + # causing problems later on. + proc `%`*(elements: set[bool]): JsonNode = + ## Generic constructor for JSON data. Creates a new `JObject JsonNode`. + ## This can only be used with the empty set ``{}`` and is supported + ## to prevent the gotcha ``%*{}`` which used to produce an empty + ## JSON array. + result = newJObject() + assert false notin elements, "usage error: only empty sets allowed" + assert true notin elements, "usage error: only empty sets allowed" + +proc `[]=`*(obj: JsonNode, key: string, val: JsonNode) {.inline.} = + ## Sets a field from a `JObject`. + assert(obj.kind == JObject) + obj.fields[key] = val + +proc `%`*[T: object](o: T): JsonNode = + ## Construct JsonNode from tuples and objects. + result = newJObject() + for k, v in o.fieldPairs: result[k] = %v + +proc `%`*(o: ref object): JsonNode = + ## Generic constructor for JSON data. Creates a new `JObject JsonNode` + if o.isNil: + result = newJNull() + else: + result = %(o[]) + +proc `%`*(o: enum): JsonNode = + ## Construct a JsonNode that represents the specified enum value as a + ## string. Creates a new ``JString JsonNode``. + result = %($o) + +proc toJsonImpl(x: NimNode): NimNode {.compileTime.} = + case x.kind + of nnkBracket: # array + if x.len == 0: return newCall(bindSym"newJArray") + result = newNimNode(nnkBracket) + for i in 0 ..< x.len: + result.add(toJsonImpl(x[i])) + result = newCall(bindSym("%", brOpen), result) + of nnkTableConstr: # object + if x.len == 0: return newCall(bindSym"newJObject") + result = newNimNode(nnkTableConstr) + for i in 0 ..< x.len: + x[i].expectKind nnkExprColonExpr + result.add newTree(nnkExprColonExpr, x[i][0], toJsonImpl(x[i][1])) + result = newCall(bindSym("%", brOpen), result) + of nnkCurly: # empty object + x.expectLen(0) + result = newCall(bindSym"newJObject") + of nnkNilLit: + result = newCall(bindSym"newJNull") + of nnkPar: + if x.len == 1: result = toJsonImpl(x[0]) + else: result = newCall(bindSym("%", brOpen), x) + else: + result = newCall(bindSym("%", brOpen), x) + +macro `%*`*(x: untyped): untyped = + ## Convert an expression to a JsonNode directly, without having to specify + ## `%` for every element. + result = toJsonImpl(x) + +proc `==`*(a, b: JsonNode): bool = + ## Check two nodes for equality + if a.isNil: + if b.isNil: return true + return false + elif b.isNil or a.kind != b.kind: + return false + else: + case a.kind + of JString: + result = a.str == b.str + of JInt: + result = a.num == b.num + of JFloat: + result = a.fnum == b.fnum + of JBool: + result = a.bval == b.bval + of JNull: + result = true + of JArray: + result = a.elems == b.elems + of JObject: + # we cannot use OrderedTable's equality here as + # the order does not matter for equality here. + if a.fields.len != b.fields.len: return false + for key, val in a.fields: + if not b.fields.hasKey(key): return false + if b.fields[key] != val: return false + result = true + +proc hash*(n: OrderedTable[string, JsonNode]): Hash {.noSideEffect.} + +proc hash*(n: JsonNode): Hash = + ## Compute the hash for a JSON node + case n.kind + of JArray: + result = hash(n.elems) + of JObject: + result = hash(n.fields) + of JInt: + result = hash(n.num) + of JFloat: + result = hash(n.fnum) + of JBool: + result = hash(n.bval.int) + of JString: + result = hash(n.str) + of JNull: + result = Hash(0) + +proc hash*(n: OrderedTable[string, JsonNode]): Hash = + for key, val in n: + result = result xor (hash(key) !& hash(val)) + result = !$result + +proc len*(n: JsonNode): int = + ## If `n` is a `JArray`, it returns the number of elements. + ## If `n` is a `JObject`, it returns the number of pairs. + ## Else it returns 0. + case n.kind + of JArray: result = n.elems.len + of JObject: result = n.fields.len + else: discard + +proc `[]`*(node: JsonNode, name: string): JsonNode {.inline.} = + ## Gets a field from a `JObject`, which must not be nil. + ## If the value at `name` does not exist, raises KeyError. + assert(not isNil(node)) + assert(node.kind == JObject) + when defined(nimJsonGet): + if not node.fields.hasKey(name): return nil + result = node.fields[name] + +proc `[]`*(node: JsonNode, index: int): JsonNode {.inline.} = + ## Gets the node at `index` in an Array. Result is undefined if `index` + ## is out of bounds, but as long as array bound checks are enabled it will + ## result in an exception. + assert(not isNil(node)) + assert(node.kind == JArray) + return node.elems[index] + +proc hasKey*(node: JsonNode, key: string): bool = + ## Checks if `key` exists in `node`. + assert(node.kind == JObject) + result = node.fields.hasKey(key) + +proc contains*(node: JsonNode, key: string): bool = + ## Checks if `key` exists in `node`. + assert(node.kind == JObject) + node.fields.hasKey(key) + +proc contains*(node: JsonNode, val: JsonNode): bool = + ## Checks if `val` exists in array `node`. + assert(node.kind == JArray) + find(node.elems, val) >= 0 + +proc `{}`*(node: JsonNode, keys: varargs[string]): JsonNode = + ## Traverses the node and gets the given value. If any of the + ## keys do not exist, returns ``nil``. Also returns ``nil`` if one of the + ## intermediate data structures is not an object. + ## + ## This proc can be used to create tree structures on the + ## fly (sometimes called `autovivification`:idx:): + ## + runnableExamples: + var myjson = %* {"parent": {"child": {"grandchild": 1}}} + doAssert myjson{"parent", "child", "grandchild"} == newJInt(1) + + result = node + for key in keys: + if isNil(result) or result.kind != JObject: + return nil + result = result.fields.getOrDefault(key) + +proc `{}`*(node: JsonNode, index: varargs[int]): JsonNode = + ## Traverses the node and gets the given value. If any of the + ## indexes do not exist, returns ``nil``. Also returns ``nil`` if one of the + ## intermediate data structures is not an array. + result = node + for i in index: + if isNil(result) or result.kind != JArray or i >= node.len: + return nil + result = result.elems[i] + +proc getOrDefault*(node: JsonNode, key: string): JsonNode = + ## Gets a field from a `node`. If `node` is nil or not an object or + ## value at `key` does not exist, returns nil + if not isNil(node) and node.kind == JObject: + result = node.fields.getOrDefault(key) + +proc `{}`*(node: JsonNode, key: string): JsonNode = + ## Gets a field from a `node`. If `node` is nil or not an object or + ## value at `key` does not exist, returns nil + node.getOrDefault(key) + +proc `{}=`*(node: JsonNode, keys: varargs[string], value: JsonNode) = + ## Traverses the node and tries to set the value at the given location + ## to ``value``. If any of the keys are missing, they are added. + var node = node + for i in 0..(keys.len-2): + if not node.hasKey(keys[i]): + node[keys[i]] = newJObject() + node = node[keys[i]] + node[keys[keys.len-1]] = value + +proc delete*(obj: JsonNode, key: string) = + ## Deletes ``obj[key]``. + assert(obj.kind == JObject) + if not obj.fields.hasKey(key): + raise newException(KeyError, "key not in object") + obj.fields.del(key) + +proc copy*(p: JsonNode): JsonNode = + ## Performs a deep copy of `a`. + case p.kind + of JString: + result = newJString(p.str) + result.isUnquoted = p.isUnquoted + of JInt: + result = newJInt(p.num) + of JFloat: + result = newJFloat(p.fnum) + of JBool: + result = newJBool(p.bval) + of JNull: + result = newJNull() + of JObject: + result = newJObject() + for key, val in pairs(p.fields): + result.fields[key] = copy(val) + of JArray: + result = newJArray() + for i in items(p.elems): + result.elems.add(copy(i)) + +# ------------- pretty printing ---------------------------------------------- + +proc indent(s: var string, i: int) = + s.add(spaces(i)) + +proc newIndent(curr, indent: int, ml: bool): int = + if ml: return curr + indent + else: return indent + +proc nl(s: var string, ml: bool) = + s.add(if ml: "\n" else: " ") + +proc escapeJsonUnquoted*(s: string; result: var string) = + ## Converts a string `s` to its JSON representation without quotes. + ## Appends to ``result``. + for c in s: + case c + of '\L': result.add("\\n") + of '\b': result.add("\\b") + of '\f': result.add("\\f") + of '\t': result.add("\\t") + of '\v': result.add("\\u000b") + of '\r': result.add("\\r") + of '"': result.add("\\\"") + of '\0'..'\7': result.add("\\u000" & $ord(c)) + of '\14'..'\31': result.add("\\u00" & toHex(ord(c), 2)) + of '\\': result.add("\\\\") + else: result.add(c) + +proc escapeJsonUnquoted*(s: string): string = + ## Converts a string `s` to its JSON representation without quotes. + result = newStringOfCap(s.len + s.len shr 3) + escapeJsonUnquoted(s, result) + +proc escapeJson*(s: string; result: var string) = + ## Converts a string `s` to its JSON representation with quotes. + ## Appends to ``result``. + result.add("\"") + escapeJsonUnquoted(s, result) + result.add("\"") + +proc escapeJson*(s: string): string = + ## Converts a string `s` to its JSON representation with quotes. + result = newStringOfCap(s.len + s.len shr 3) + escapeJson(s, result) + +proc toPretty(result: var string, node: JsonNode, indent = 2, ml = true, + lstArr = false, currIndent = 0) = + case node.kind + of JObject: + if lstArr: result.indent(currIndent) # Indentation + if node.fields.len > 0: + result.add("{") + result.nl(ml) # New line + var i = 0 + for key, val in pairs(node.fields): + if i > 0: + result.add(",") + result.nl(ml) # New Line + inc i + # Need to indent more than { + result.indent(newIndent(currIndent, indent, ml)) + escapeJson(key, result) + result.add(": ") + toPretty(result, val, indent, ml, false, + newIndent(currIndent, indent, ml)) + result.nl(ml) + result.indent(currIndent) # indent the same as { + result.add("}") + else: + result.add("{}") + of JString: + if lstArr: result.indent(currIndent) + if node.isUnquoted: + result.add node.str + else: + escapeJson(node.str, result) + of JInt: + if lstArr: result.indent(currIndent) + when defined(js): result.add($node.num) + else: result.addInt(node.num) + of JFloat: + if lstArr: result.indent(currIndent) + # Fixme: implement new system.add ops for the JS target + when defined(js): result.add($node.fnum) + else: result.addFloat(node.fnum) + of JBool: + if lstArr: result.indent(currIndent) + result.add(if node.bval: "true" else: "false") + of JArray: + if lstArr: result.indent(currIndent) + if len(node.elems) != 0: + result.add("[") + result.nl(ml) + for i in 0..len(node.elems)-1: + if i > 0: + result.add(",") + result.nl(ml) # New Line + toPretty(result, node.elems[i], indent, ml, + true, newIndent(currIndent, indent, ml)) + result.nl(ml) + result.indent(currIndent) + result.add("]") + else: result.add("[]") + of JNull: + if lstArr: result.indent(currIndent) + result.add("null") + +proc pretty*(node: JsonNode, indent = 2): string = + ## Returns a JSON Representation of `node`, with indentation and + ## on multiple lines. + ## + ## Similar to prettyprint in Python. + runnableExamples: + let j = %* {"name": "Isaac", "books": ["Robot Dreams"], + "details": {"age": 35, "pi": 3.1415}} + doAssert pretty(j) == """ +{ + "name": "Isaac", + "books": [ + "Robot Dreams" + ], + "details": { + "age": 35, + "pi": 3.1415 + } +}""" + result = "" + toPretty(result, node, indent) + +proc toUgly*(result: var string, node: JsonNode) = + ## Converts `node` to its JSON Representation, without + ## regard for human readability. Meant to improve ``$`` string + ## conversion performance. + ## + ## JSON representation is stored in the passed `result` + ## + ## This provides higher efficiency than the ``pretty`` procedure as it + ## does **not** attempt to format the resulting JSON to make it human readable. + var comma = false + case node.kind: + of JArray: + result.add "[" + for child in node.elems: + if comma: result.add "," + else: comma = true + result.toUgly child + result.add "]" + of JObject: + result.add "{" + for key, value in pairs(node.fields): + if comma: result.add "," + else: comma = true + key.escapeJson(result) + result.add ":" + result.toUgly value + result.add "}" + of JString: + if node.isUnquoted: + result.add node.str + else: + node.str.escapeJson(result) + of JInt: + when defined(js): result.add($node.num) + else: result.addInt(node.num) + of JFloat: + when defined(js): result.add($node.fnum) + else: result.addFloat(node.fnum) + of JBool: + result.add(if node.bval: "true" else: "false") + of JNull: + result.add "null" + +proc `$`*(node: JsonNode): string = + ## Converts `node` to its JSON Representation on one line. + result = newStringOfCap(node.len shl 1) + toUgly(result, node) + +iterator items*(node: JsonNode): JsonNode = + ## Iterator for the items of `node`. `node` has to be a JArray. + assert node.kind == JArray, ": items() can not iterate a JsonNode of kind " & $node.kind + for i in items(node.elems): + yield i + +iterator mitems*(node: var JsonNode): var JsonNode = + ## Iterator for the items of `node`. `node` has to be a JArray. Items can be + ## modified. + assert node.kind == JArray, ": mitems() can not iterate a JsonNode of kind " & $node.kind + for i in mitems(node.elems): + yield i + +iterator pairs*(node: JsonNode): tuple[key: string, val: JsonNode] = + ## Iterator for the child elements of `node`. `node` has to be a JObject. + assert node.kind == JObject, ": pairs() can not iterate a JsonNode of kind " & $node.kind + for key, val in pairs(node.fields): + yield (key, val) + +iterator keys*(node: JsonNode): string = + ## Iterator for the keys in `node`. `node` has to be a JObject. + assert node.kind == JObject, ": keys() can not iterate a JsonNode of kind " & $node.kind + for key in node.fields.keys: + yield key + +iterator mpairs*(node: var JsonNode): tuple[key: string, val: var JsonNode] = + ## Iterator for the child elements of `node`. `node` has to be a JObject. + ## Values can be modified + assert node.kind == JObject, ": mpairs() can not iterate a JsonNode of kind " & $node.kind + for key, val in mpairs(node.fields): + yield (key, val) + +proc parseJson(p: var JsonParser; rawIntegers, rawFloats: bool): JsonNode = + ## Parses JSON from a JSON Parser `p`. + case p.tok + of tkString: + # we capture 'p.a' here, so we need to give it a fresh buffer afterwards: + result = newJStringMove(p.a) + p.a = "" + discard getTok(p) + of tkInt: + if rawIntegers: + result = newJRawNumber(p.a) + else: + try: + result = newJInt(parseBiggestInt(p.a)) + except ValueError: + result = newJRawNumber(p.a) + discard getTok(p) + of tkFloat: + if rawFloats: + result = newJRawNumber(p.a) + else: + try: + result = newJFloat(parseFloat(p.a)) + except ValueError: + result = newJRawNumber(p.a) + discard getTok(p) + of tkTrue: + result = newJBool(true) + discard getTok(p) + of tkFalse: + result = newJBool(false) + discard getTok(p) + of tkNull: + result = newJNull() + discard getTok(p) + of tkCurlyLe: + result = newJObject() + discard getTok(p) + while p.tok != tkCurlyRi: + if p.tok != tkString: + raiseParseErr(p, "string literal as key") + var key = p.a + discard getTok(p) + eat(p, tkColon) + var val = parseJson(p, rawIntegers, rawFloats) + result[key] = val + if p.tok != tkComma: break + discard getTok(p) + # Raise `JsonParsingError` for a trailing comma, unlike `std/json` + if p.tok == tkCurlyRi: + raiseParseErr(p, "found trailing comma. } or key/value pair") + eat(p, tkCurlyRi) + of tkBracketLe: + result = newJArray() + discard getTok(p) + while p.tok != tkBracketRi: + result.add(parseJson(p, rawIntegers, rawFloats)) + if p.tok != tkComma: break + discard getTok(p) + # Raise `JsonParsingError` for a trailing comma, unlike `std/json` + if p.tok == tkBracketRi: + raiseParseErr(p, "found trailing comma. ] or array item") + eat(p, tkBracketRi) + of tkError, tkCurlyRi, tkBracketRi, tkColon, tkComma, tkEof: + raiseParseErr(p, "{") + +iterator parseJsonFragments*(s: Stream, filename: string = ""; rawIntegers = false, rawFloats = false): JsonNode = + ## Parses from a stream `s` into `JsonNodes`. `filename` is only needed + ## for nice error messages. + ## The JSON fragments are separated by whitespace. This can be substantially + ## faster than the comparable loop + ## ``for x in splitWhitespace(s): yield parseJson(x)``. + ## This closes the stream `s` after it's done. + ## If `rawIntegers` is true, integer literals will not be converted to a `JInt` + ## field but kept as raw numbers via `JString`. + ## If `rawFloats` is true, floating point literals will not be converted to a `JFloat` + ## field but kept as raw numbers via `JString`. + var p: JsonParser + p.open(s, filename) + try: + discard getTok(p) # read first token + while p.tok != tkEof: + yield p.parseJson(rawIntegers, rawFloats) + finally: + p.close() + +proc parseJson*(s: Stream, filename: string = ""; rawIntegers = false, rawFloats = false): JsonNode = + ## Parses from a stream `s` into a `JsonNode`. `filename` is only needed + ## for nice error messages. + ## If `s` contains extra data, it will raise `JsonParsingError`. + ## This closes the stream `s` after it's done. + ## If `rawIntegers` is true, integer literals will not be converted to a `JInt` + ## field but kept as raw numbers via `JString`. + ## If `rawFloats` is true, floating point literals will not be converted to a `JFloat` + ## field but kept as raw numbers via `JString`. + var p: JsonParser + p.open(s, filename) + try: + discard getTok(p) # read first token + result = p.parseJson(rawIntegers, rawFloats) + eat(p, tkEof) # check if there is no extra data + finally: + p.close() + +when defined(js): + from math import `mod` + type + JSObject = object + + proc parseNativeJson(x: cstring): JSObject {.importc: "JSON.parse".} + + proc getVarType(x: JSObject): JsonNodeKind = + result = JNull + proc getProtoName(y: JSObject): cstring + {.importc: "Object.prototype.toString.call".} + case $getProtoName(x) # TODO: Implicit returns fail here. + of "[object Array]": return JArray + of "[object Object]": return JObject + of "[object Number]": + if cast[float](x) mod 1.0 == 0: + return JInt + else: + return JFloat + of "[object Boolean]": return JBool + of "[object Null]": return JNull + of "[object String]": return JString + else: assert false + + proc len(x: JSObject): int = + assert x.getVarType == JArray + asm """ + `result` = `x`.length; + """ + + proc `[]`(x: JSObject, y: string): JSObject = + assert x.getVarType == JObject + asm """ + `result` = `x`[`y`]; + """ + + proc `[]`(x: JSObject, y: int): JSObject = + assert x.getVarType == JArray + asm """ + `result` = `x`[`y`]; + """ + + proc convertObject(x: JSObject): JsonNode = + case getVarType(x) + of JArray: + result = newJArray() + for i in 0 ..< x.len: + result.add(x[i].convertObject()) + of JObject: + result = newJObject() + asm """for (var property in `x`) { + if (`x`.hasOwnProperty(property)) { + """ + var nimProperty: cstring + var nimValue: JSObject + asm "`nimProperty` = property; `nimValue` = `x`[property];" + result[$nimProperty] = nimValue.convertObject() + asm "}}" + of JInt: + result = newJInt(cast[int](x)) + of JFloat: + result = newJFloat(cast[float](x)) + of JString: + # Dunno what to do with isUnquoted here + result = newJString($cast[cstring](x)) + of JBool: + result = newJBool(cast[bool](x)) + of JNull: + result = newJNull() + + proc parseJson*(buffer: string): JsonNode = + when nimvm: + return parseJson(newStringStream(buffer), "input") + else: + return parseNativeJson(buffer).convertObject() + +else: + proc parseJson*(buffer: string; rawIntegers = false, rawFloats = false): JsonNode = + ## Parses JSON from `buffer`. + ## If `buffer` contains extra data, it will raise `JsonParsingError`. + ## If `rawIntegers` is true, integer literals will not be converted to a `JInt` + ## field but kept as raw numbers via `JString`. + ## If `rawFloats` is true, floating point literals will not be converted to a `JFloat` + ## field but kept as raw numbers via `JString`. + result = parseJson(newStringStream(buffer), "input", rawIntegers, rawFloats) + + proc parseFile*(filename: string): JsonNode = + ## Parses `file` into a `JsonNode`. + ## If `file` contains extra data, it will raise `JsonParsingError`. + var stream = newFileStream(filename, fmRead) + if stream == nil: + raise newException(IOError, "cannot read from file: " & filename) + result = parseJson(stream, filename, rawIntegers=false, rawFloats=false) + +# -- Json deserialiser. -- + +template verifyJsonKind(node: JsonNode, kinds: set[JsonNodeKind], + ast: string) = + if node == nil: + raise newException(KeyError, "key not found: " & ast) + elif node.kind notin kinds: + let msg = "Incorrect JSON kind. Wanted '$1' in '$2' but got '$3'." % [ + $kinds, + ast, + $node.kind + ] + raise newException(JsonKindError, msg) + +when defined(nimFixedForwardGeneric): + + macro isRefSkipDistinct*(arg: typed): untyped = + ## internal only, do not use + var impl = getTypeImpl(arg) + if impl.kind == nnkBracketExpr and impl[0].eqIdent("typeDesc"): + impl = getTypeImpl(impl[1]) + while impl.kind == nnkDistinctTy: + impl = getTypeImpl(impl[0]) + result = newLit(impl.kind == nnkRefTy) + + # The following forward declarations don't work in older versions of Nim + + # forward declare all initFromJson + + proc initFromJson(dst: var string; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson(dst: var bool; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson(dst: var JsonNode; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T: SomeInteger](dst: var T; jsonNode: JsonNode, jsonPath: var string) + proc initFromJson[T: SomeFloat](dst: var T; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T: enum](dst: var T; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T](dst: var seq[T]; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[S, T](dst: var array[S, T]; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T](dst: var Table[string, T]; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T](dst: var OrderedTable[string, T]; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T](dst: var ref T; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T](dst: var Option[T]; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T: distinct](dst: var T; jsonNode: JsonNode; jsonPath: var string) + proc initFromJson[T: object|tuple](dst: var T; jsonNode: JsonNode; jsonPath: var string) + + # initFromJson definitions + + proc initFromJson(dst: var string; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JString, JNull}, jsonPath) + # since strings don't have a nil state anymore, this mapping of + # JNull to the default string is questionable. `none(string)` and + # `some("")` have the same potentional json value `JNull`. + if jsonNode.kind == JNull: + dst = "" + else: + dst = jsonNode.str + + proc initFromJson(dst: var bool; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JBool}, jsonPath) + dst = jsonNode.bval + + proc initFromJson(dst: var JsonNode; jsonNode: JsonNode; jsonPath: var string) = + if jsonNode == nil: + raise newException(KeyError, "key not found: " & jsonPath) + dst = jsonNode.copy + + proc initFromJson[T: SomeInteger](dst: var T; jsonNode: JsonNode, jsonPath: var string) = + when T is uint|uint64 or (not defined(js) and int.sizeof == 4): + verifyJsonKind(jsonNode, {JInt, JString}, jsonPath) + case jsonNode.kind + of JString: + let x = parseBiggestUInt(jsonNode.str) + dst = cast[T](x) + else: + dst = T(jsonNode.num) + else: + verifyJsonKind(jsonNode, {JInt}, jsonPath) + dst = cast[T](jsonNode.num) + + proc initFromJson[T: SomeFloat](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JInt, JFloat}, jsonPath) + if jsonNode.kind == JFloat: + dst = T(jsonNode.fnum) + else: + dst = T(jsonNode.num) + + proc initFromJson[T: enum](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JString}, jsonPath) + dst = parseEnum[T](jsonNode.getStr) + + proc initFromJson[T](dst: var seq[T]; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JArray}, jsonPath) + dst.setLen jsonNode.len + let orignalJsonPathLen = jsonPath.len + for i in 0 ..< jsonNode.len: + jsonPath.add '[' + jsonPath.addInt i + jsonPath.add ']' + initFromJson(dst[i], jsonNode[i], jsonPath) + jsonPath.setLen orignalJsonPathLen + + proc initFromJson[S,T](dst: var array[S,T]; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JArray}, jsonPath) + let originalJsonPathLen = jsonPath.len + for i in 0 ..< jsonNode.len: + jsonPath.add '[' + jsonPath.addInt i + jsonPath.add ']' + initFromJson(dst[i], jsonNode[i], jsonPath) + jsonPath.setLen originalJsonPathLen + + proc initFromJson[T](dst: var Table[string,T]; jsonNode: JsonNode; jsonPath: var string) = + dst = initTable[string, T]() + verifyJsonKind(jsonNode, {JObject}, jsonPath) + let originalJsonPathLen = jsonPath.len + for key in keys(jsonNode.fields): + jsonPath.add '.' + jsonPath.add key + initFromJson(mgetOrPut(dst, key, default(T)), jsonNode[key], jsonPath) + jsonPath.setLen originalJsonPathLen + + proc initFromJson[T](dst: var OrderedTable[string,T]; jsonNode: JsonNode; jsonPath: var string) = + dst = initOrderedTable[string,T]() + verifyJsonKind(jsonNode, {JObject}, jsonPath) + let originalJsonPathLen = jsonPath.len + for key in keys(jsonNode.fields): + jsonPath.add '.' + jsonPath.add key + initFromJson(mgetOrPut(dst, key, default(T)), jsonNode[key], jsonPath) + jsonPath.setLen originalJsonPathLen + + proc initFromJson[T](dst: var ref T; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JObject, JNull}, jsonPath) + if jsonNode.kind == JNull: + dst = nil + else: + dst = new(T) + initFromJson(dst[], jsonNode, jsonPath) + + proc initFromJson[T](dst: var Option[T]; jsonNode: JsonNode; jsonPath: var string) = + if jsonNode != nil and jsonNode.kind != JNull: + when T is ref: + dst = some(new(T)) + else: + dst = some(default(T)) + initFromJson(dst.get, jsonNode, jsonPath) + + macro assignDistinctImpl[T: distinct](dst: var T;jsonNode: JsonNode; jsonPath: var string) = + let typInst = getTypeInst(dst) + let typImpl = getTypeImpl(dst) + let baseTyp = typImpl[0] + + result = quote do: + when nimvm: + # workaround #12282 + var tmp: `baseTyp` + initFromJson( tmp, `jsonNode`, `jsonPath`) + `dst` = `typInst`(tmp) + else: + initFromJson( `baseTyp`(`dst`), `jsonNode`, `jsonPath`) + + proc initFromJson[T: distinct](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + assignDistinctImpl(dst, jsonNode, jsonPath) + + proc detectIncompatibleType(typeExpr, lineinfoNode: NimNode): void = + if typeExpr.kind == nnkTupleConstr: + error("Use a named tuple instead of: " & typeExpr.repr, lineinfoNode) + + proc foldObjectBody(dst, typeNode, tmpSym, jsonNode, jsonPath, originalJsonPathLen: NimNode): void {.compileTime.} = + case typeNode.kind + of nnkEmpty: + discard + of nnkRecList, nnkTupleTy: + for it in typeNode: + foldObjectBody(dst, it, tmpSym, jsonNode, jsonPath, originalJsonPathLen) + + of nnkIdentDefs: + typeNode.expectLen 3 + let fieldSym = typeNode[0] + let fieldNameLit = newLit(fieldSym.strVal) + let fieldPathLit = newLit("." & fieldSym.strVal) + let fieldType = typeNode[1] + + # Detecting incompatiple tuple types in `assignObjectImpl` only + # would be much cleaner, but the ast for tuple types does not + # contain usable type information. + detectIncompatibleType(fieldType, fieldSym) + + dst.add quote do: + jsonPath.add `fieldPathLit` + when nimvm: + when isRefSkipDistinct(`tmpSym`.`fieldSym`): + # workaround #12489 + var tmp: `fieldType` + initFromJson(tmp, getOrDefault(`jsonNode`,`fieldNameLit`), `jsonPath`) + `tmpSym`.`fieldSym` = tmp + else: + initFromJson(`tmpSym`.`fieldSym`, getOrDefault(`jsonNode`,`fieldNameLit`), `jsonPath`) + else: + initFromJson(`tmpSym`.`fieldSym`, getOrDefault(`jsonNode`,`fieldNameLit`), `jsonPath`) + jsonPath.setLen `originalJsonPathLen` + + of nnkRecCase: + let kindSym = typeNode[0][0] + let kindNameLit = newLit(kindSym.strVal) + let kindPathLit = newLit("." & kindSym.strVal) + let kindType = typeNode[0][1] + let kindOffsetLit = newLit(uint(getOffset(kindSym))) + dst.add quote do: + var kindTmp: `kindType` + jsonPath.add `kindPathLit` + initFromJson(kindTmp, `jsonNode`[`kindNameLit`], `jsonPath`) + jsonPath.setLen `originalJsonPathLen` + when defined js: + `tmpSym`.`kindSym` = kindTmp + else: + when nimvm: + `tmpSym`.`kindSym` = kindTmp + else: + # fuck it, assign kind field anyway + ((cast[ptr `kindType`](cast[uint](`tmpSym`.addr) + `kindOffsetLit`))[]) = kindTmp + dst.add nnkCaseStmt.newTree(nnkDotExpr.newTree(tmpSym, kindSym)) + for i in 1 ..< typeNode.len: + foldObjectBody(dst, typeNode[i], tmpSym, jsonNode, jsonPath, originalJsonPathLen) + + of nnkOfBranch, nnkElse: + let ofBranch = newNimNode(typeNode.kind) + for i in 0 ..< typeNode.len-1: + ofBranch.add copyNimTree(typeNode[i]) + let dstInner = newNimNode(nnkStmtListExpr) + foldObjectBody(dstInner, typeNode[^1], tmpSym, jsonNode, jsonPath, originalJsonPathLen) + # resOuter now contains the inner stmtList + ofBranch.add dstInner + dst[^1].expectKind nnkCaseStmt + dst[^1].add ofBranch + + of nnkObjectTy: + typeNode[0].expectKind nnkEmpty + typeNode[1].expectKind {nnkEmpty, nnkOfInherit} + if typeNode[1].kind == nnkOfInherit: + let base = typeNode[1][0] + var impl = getTypeImpl(base) + while impl.kind in {nnkRefTy, nnkPtrTy}: + impl = getTypeImpl(impl[0]) + foldObjectBody(dst, impl, tmpSym, jsonNode, jsonPath, originalJsonPathLen) + let body = typeNode[2] + foldObjectBody(dst, body, tmpSym, jsonNode, jsonPath, originalJsonPathLen) + + else: + error("unhandled kind: " & $typeNode.kind, typeNode) + + macro assignObjectImpl[T](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + let typeSym = getTypeInst(dst) + let originalJsonPathLen = genSym(nskLet, "originalJsonPathLen") + result = newStmtList() + result.add quote do: + let `originalJsonPathLen` = len(`jsonPath`) + if typeSym.kind in {nnkTupleTy, nnkTupleConstr}: + # both, `dst` and `typeSym` don't have good lineinfo. But nothing + # else is available here. + detectIncompatibleType(typeSym, dst) + foldObjectBody(result, typeSym, dst, jsonNode, jsonPath, originalJsonPathLen) + else: + foldObjectBody(result, typeSym.getTypeImpl, dst, jsonNode, jsonPath, originalJsonPathLen) + + proc initFromJson[T: object|tuple](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + assignObjectImpl(dst, jsonNode, jsonPath) + + proc to*[T](node: JsonNode, t: typedesc[T]): T = + ## `Unmarshals`:idx: the specified node into the object type specified. + ## + ## Known limitations: + ## + ## * Heterogeneous arrays are not supported. + ## * Sets in object variants are not supported. + ## * Not nil annotations are not supported. + ## + runnableExamples: + let jsonNode = parseJson(""" + { + "person": { + "name": "Nimmer", + "age": 21 + }, + "list": [1, 2, 3, 4] + } + """) + + type + Person = object + name: string + age: int + + Data = object + person: Person + list: seq[int] + + var data = to(jsonNode, Data) + doAssert data.person.name == "Nimmer" + doAssert data.person.age == 21 + doAssert data.list == @[1, 2, 3, 4] + + var jsonPath = "" + initFromJson(result, node, jsonPath) + +when false: + import os + var s = newFileStream(paramStr(1), fmRead) + if s == nil: quit("cannot open the file" & paramStr(1)) + var x: JsonParser + open(x, s, paramStr(1)) + while true: + next(x) + case x.kind + of jsonError: + Echo(x.errorMsg()) + break + of jsonEof: break + of jsonString, jsonInt, jsonFloat: echo(x.str) + of jsonTrue: echo("!TRUE") + of jsonFalse: echo("!FALSE") + of jsonNull: echo("!NULL") + of jsonObjectStart: echo("{") + of jsonObjectEnd: echo("}") + of jsonArrayStart: echo("[") + of jsonArrayEnd: echo("]") + + close(x) + +# { "json": 5 } +# To get that we shall use, obj["json"] diff --git a/src/patched_stdlib/parsejson.nim b/src/patched_stdlib/parsejson.nim new file mode 100644 index 00000000..03e70a5b --- /dev/null +++ b/src/patched_stdlib/parsejson.nim @@ -0,0 +1,507 @@ +# This file is minimally adapted from this version in the Nim standard library: +# https://github.com/nim-lang/Nim/blob/8e3a349561fb/lib/pure/parsejson.nim +# The standard library version is lenient: it silently allows line comments +# with `//`, and long comments with `/* */`. +# The below version is stricter: it raises a `JsonParsingError` for such +# comments. + +# Copyright (c) 2018 Nim contributors + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +## This module implements a json parser. It is used +## and exported by the ``json`` standard library +## module, but can also be used in its own right. + +import + strutils, lexbase, streams, unicode + +type + JsonEventKind* = enum ## enumeration of all events that may occur when parsing + jsonError, ## an error occurred during parsing + jsonEof, ## end of file reached + jsonString, ## a string literal + jsonInt, ## an integer literal + jsonFloat, ## a float literal + jsonTrue, ## the value ``true`` + jsonFalse, ## the value ``false`` + jsonNull, ## the value ``null`` + jsonObjectStart, ## start of an object: the ``{`` token + jsonObjectEnd, ## end of an object: the ``}`` token + jsonArrayStart, ## start of an array: the ``[`` token + jsonArrayEnd ## end of an array: the ``]`` token + + TokKind* = enum # must be synchronized with TJsonEventKind! + tkError, + tkEof, + tkString, + tkInt, + tkFloat, + tkTrue, + tkFalse, + tkNull, + tkCurlyLe, + tkCurlyRi, + tkBracketLe, + tkBracketRi, + tkColon, + tkComma + + JsonError* = enum ## enumeration that lists all errors that can occur + errNone, ## no error + errInvalidToken, ## invalid token + errStringExpected, ## string expected + errColonExpected, ## ``:`` expected + errCommaExpected, ## ``,`` expected + errBracketRiExpected, ## ``]`` expected + errCurlyRiExpected, ## ``}`` expected + errQuoteExpected, ## ``"`` or ``'`` expected + errEofExpected, ## EOF expected + errExprExpected ## expr expected + + ParserState = enum + stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma, + stateExpectObjectComma, stateExpectColon, stateExpectValue + + JsonParser* = object of BaseLexer ## the parser object. + a*: string + tok*: TokKind + kind: JsonEventKind + err: JsonError + state: seq[ParserState] + filename: string + rawStringLiterals: bool + + JsonKindError* = object of ValueError ## raised by the ``to`` macro if the + ## JSON kind is incorrect. + JsonParsingError* = object of ValueError ## is raised for a JSON error + +const + errorMessages*: array[JsonError, string] = [ + "no error", + "invalid token", + "string expected", + "':' expected", + "',' expected", + "']' expected", + "'}' expected", + "'\"' or \"'\" expected", + "EOF expected", + "expression expected" + ] + tokToStr: array[TokKind, string] = [ + "invalid token", + "EOF", + "string literal", + "int literal", + "float literal", + "true", + "false", + "null", + "{", "}", "[", "]", ":", "," + ] + +proc open*(my: var JsonParser, input: Stream, filename: string; + rawStringLiterals = false) = + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. If `rawStringLiterals` is true, string literals + ## are kept with their surrounding quotes and escape sequences in them are + ## left untouched too. + lexbase.open(my, input) + my.filename = filename + my.state = @[stateStart] + my.kind = jsonError + my.a = "" + my.rawStringLiterals = rawStringLiterals + +proc close*(my: var JsonParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +proc str*(my: JsonParser): string {.inline.} = + ## returns the character data for the events: ``jsonInt``, ``jsonFloat``, + ## ``jsonString`` + assert(my.kind in {jsonInt, jsonFloat, jsonString}) + return my.a + +proc getInt*(my: JsonParser): BiggestInt {.inline.} = + ## returns the number for the event: ``jsonInt`` + assert(my.kind == jsonInt) + return parseBiggestInt(my.a) + +proc getFloat*(my: JsonParser): float {.inline.} = + ## returns the number for the event: ``jsonFloat`` + assert(my.kind == jsonFloat) + return parseFloat(my.a) + +proc kind*(my: JsonParser): JsonEventKind {.inline.} = + ## returns the current event type for the JSON parser + return my.kind + +proc getColumn*(my: JsonParser): int {.inline.} = + ## get the current column the parser has arrived at. + result = getColNumber(my, my.bufpos) + +proc getLine*(my: JsonParser): int {.inline.} = + ## get the current line the parser has arrived at. + result = my.lineNumber + +proc getFilename*(my: JsonParser): string {.inline.} = + ## get the filename of the file that the parser processes. + result = my.filename + +proc errorMsg*(my: JsonParser): string = + ## returns a helpful error message for the event ``jsonError`` + assert(my.kind == jsonError) + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] + +proc errorMsgExpected*(my: JsonParser, e: string): string = + ## returns an error message "`e` expected" in the same format as the + ## other error messages + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), e & " expected"] + +proc handleHexChar(c: char, x: var int): bool = + result = true # Success + case c + of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) + of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) + of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) + else: result = false # error + +proc parseEscapedUTF16*(buf: cstring, pos: var int): int = + result = 0 + #UTF-16 escape is always 4 bytes. + for _ in 0..3: + if handleHexChar(buf[pos], result): + inc(pos) + else: + return -1 + +proc parseString(my: var JsonParser): TokKind = + result = tkString + var pos = my.bufpos + 1 + if my.rawStringLiterals: + add(my.a, '"') + while true: + case my.buf[pos] + of '\0': + my.err = errQuoteExpected + result = tkError + break + of '"': + if my.rawStringLiterals: + add(my.a, '"') + inc(pos) + break + of '\\': + if my.rawStringLiterals: + add(my.a, '\\') + case my.buf[pos+1] + of '\\', '"', '\'', '/': + add(my.a, my.buf[pos+1]) + inc(pos, 2) + of 'b': + add(my.a, '\b') + inc(pos, 2) + of 'f': + add(my.a, '\f') + inc(pos, 2) + of 'n': + add(my.a, '\L') + inc(pos, 2) + of 'r': + add(my.a, '\C') + inc(pos, 2) + of 't': + add(my.a, '\t') + inc(pos, 2) + of 'v': + add(my.a, '\v') + inc(pos, 2) + of 'u': + if my.rawStringLiterals: + add(my.a, 'u') + inc(pos, 2) + var pos2 = pos + var r = parseEscapedUTF16(my.buf, pos) + if r < 0: + my.err = errInvalidToken + break + # Deal with surrogates + if (r and 0xfc00) == 0xd800: + if my.buf[pos] != '\\' or my.buf[pos+1] != 'u': + my.err = errInvalidToken + break + inc(pos, 2) + var s = parseEscapedUTF16(my.buf, pos) + if (s and 0xfc00) == 0xdc00 and s > 0: + r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) + else: + my.err = errInvalidToken + break + if my.rawStringLiterals: + let length = pos - pos2 + for i in 1 .. length: + if my.buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}: + add(my.a, my.buf[pos2]) + inc pos2 + else: + break + else: + add(my.a, toUTF8(Rune(r))) + else: + # don't bother with the error + add(my.a, my.buf[pos]) + inc(pos) + of '\c': + pos = lexbase.handleCR(my, pos) + add(my.a, '\c') + of '\L': + pos = lexbase.handleLF(my, pos) + add(my.a, '\L') + else: + add(my.a, my.buf[pos]) + inc(pos) + my.bufpos = pos # store back + +proc skip(my: var JsonParser) = + var pos = my.bufpos + while true: + case my.buf[pos] + # Raise `JsonParsingError` for comments with `//` or `/* */`, + # unlike `std/parsejson` + of ' ', '\t': + inc(pos) + of '\c': + pos = lexbase.handleCR(my, pos) + of '\L': + pos = lexbase.handleLF(my, pos) + else: + break + my.bufpos = pos + +proc parseNumber(my: var JsonParser) = + var pos = my.bufpos + if my.buf[pos] == '-': + add(my.a, '-') + inc(pos) + if my.buf[pos] == '.': + add(my.a, "0.") + inc(pos) + else: + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) + inc(pos) + if my.buf[pos] == '.': + add(my.a, '.') + inc(pos) + # digits after the dot: + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) + inc(pos) + if my.buf[pos] in {'E', 'e'}: + add(my.a, my.buf[pos]) + inc(pos) + if my.buf[pos] in {'+', '-'}: + add(my.a, my.buf[pos]) + inc(pos) + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) + inc(pos) + my.bufpos = pos + +proc parseName(my: var JsonParser) = + var pos = my.bufpos + if my.buf[pos] in IdentStartChars: + while my.buf[pos] in IdentChars: + add(my.a, my.buf[pos]) + inc(pos) + my.bufpos = pos + +proc getTok*(my: var JsonParser): TokKind = + setLen(my.a, 0) + skip(my) # skip whitespace + case my.buf[my.bufpos] + of '-', '.', '0'..'9': + parseNumber(my) + if {'.', 'e', 'E'} in my.a: + result = tkFloat + else: + result = tkInt + of '"': + result = parseString(my) + of '[': + inc(my.bufpos) + result = tkBracketLe + of '{': + inc(my.bufpos) + result = tkCurlyLe + of ']': + inc(my.bufpos) + result = tkBracketRi + of '}': + inc(my.bufpos) + result = tkCurlyRi + of ',': + inc(my.bufpos) + result = tkComma + of ':': + inc(my.bufpos) + result = tkColon + of '\0': + result = tkEof + of 'a'..'z', 'A'..'Z', '_': + parseName(my) + case my.a + of "null": result = tkNull + of "true": result = tkTrue + of "false": result = tkFalse + else: result = tkError + else: + inc(my.bufpos) + result = tkError + my.tok = result + + +proc next*(my: var JsonParser) = + ## retrieves the first/next event. This controls the parser. + var tk = getTok(my) + var i = my.state.len-1 + # the following code is a state machine. If we had proper coroutines, + # the code could be much simpler. + case my.state[i] + of stateEof: + if tk == tkEof: + my.kind = jsonEof + else: + my.kind = jsonError + my.err = errEofExpected + of stateStart: + # tokens allowed? + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state[i] = stateEof # expect EOF next! + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateArray) # we expect any + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkEof: + my.kind = jsonEof + else: + my.kind = jsonError + my.err = errEofExpected + of stateObject: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state.add(stateExpectColon) + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateExpectColon) + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateExpectColon) + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkCurlyRi: + my.kind = jsonObjectEnd + discard my.state.pop() + else: + my.kind = jsonError + my.err = errCurlyRiExpected + of stateArray: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state.add(stateExpectArrayComma) # expect value next! + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateExpectArrayComma) + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateExpectArrayComma) + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkBracketRi: + my.kind = jsonArrayEnd + discard my.state.pop() + else: + my.kind = jsonError + my.err = errBracketRiExpected + of stateExpectArrayComma: + case tk + of tkComma: + discard my.state.pop() + next(my) + of tkBracketRi: + my.kind = jsonArrayEnd + discard my.state.pop() # pop stateExpectArrayComma + discard my.state.pop() # pop stateArray + else: + my.kind = jsonError + my.err = errBracketRiExpected + of stateExpectObjectComma: + case tk + of tkComma: + discard my.state.pop() + next(my) + of tkCurlyRi: + my.kind = jsonObjectEnd + discard my.state.pop() # pop stateExpectObjectComma + discard my.state.pop() # pop stateObject + else: + my.kind = jsonError + my.err = errCurlyRiExpected + of stateExpectColon: + case tk + of tkColon: + my.state[i] = stateExpectValue + next(my) + else: + my.kind = jsonError + my.err = errColonExpected + of stateExpectValue: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state[i] = stateExpectObjectComma + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state[i] = stateExpectObjectComma + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state[i] = stateExpectObjectComma + my.state.add(stateObject) + my.kind = jsonObjectStart + else: + my.kind = jsonError + my.err = errExprExpected + +proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} = + ## raises an `EJsonParsingError` exception. + raise newException(JsonParsingError, errorMsgExpected(p, msg)) + +proc eat*(p: var JsonParser, tok: TokKind) = + if p.tok == tok: discard getTok(p) + else: raiseParseErr(p, tokToStr[tok]) diff --git a/tests/all_tests.nim b/tests/all_tests.nim index 0cee2bb0..5e5dabe5 100644 --- a/tests/all_tests.nim +++ b/tests/all_tests.nim @@ -1,5 +1,6 @@ import "."/[ test_binary, + test_json, test_lint, test_probspecs, test_tracks, diff --git a/tests/test_json.nim b/tests/test_json.nim new file mode 100644 index 00000000..ce16780a --- /dev/null +++ b/tests/test_json.nim @@ -0,0 +1,129 @@ +## We have patched the stdlib modules of `std/json` and `std/parsejson` so that +## parsing the below cases now raises a `JsonParsingError`: +## - a trailing comma e.g. "[1, 2,]" +## - a line comment e.g. "[1, 2] // here is a line comment" +## - a (potentially) multi-line comment e.g. "[1, 2] /* here is a comment */" +## +## This module tests that our patched JSON parser works as expected. +import std/[json, strutils, unittest] + +template checkEachLineIsInvalidJson(s: string) = + # This must be a template, rather than a proc. With the latter, a failing test + # would correctly produce a non-zero exit code and an error message, but + # incorrectly print [OK] rather than [FAILED]. + for line in s.unindent().splitLines(): + expect JsonParsingError: + discard parseJson(line) + +proc testJsonParser = + suite "parsejson": + test "valid JSON: empty array": + check: + parseJson("[]") == newJArray() + + test "valid JSON: empty object": + check: + parseJson("{}") == newJObject() + + test "valid JSON: empty object surrounded by extra whitespace": + const s = """ + + {} + + """ + check: + parseJson(s) == newJObject() + + test "invalid JSON: empty string": + checkEachLineIsInvalidJson "" + + test "invalid JSON: fragments of valid JSON": + # This is one case where `jq` is bad as a JSON validator. `jq` is + # stream-oriented, so it exits with an exit code of 0 for e.g. + # `echo '[][]' | jq .` + # and even + # `echo '' | jq .` + checkEachLineIsInvalidJson """ + [][] + {}{} + {}[] + []{} + [] [] + {} {}""" + expect JsonParsingError: + discard parseJson("[]\n{}") + + test "invalid JSON: single comma": + checkEachLineIsInvalidJson "," + + test "invalid JSON: comma after open bracket/curly": + checkEachLineIsInvalidJson """ + [, + [ , + {, + { , + {",} + {"a,} + {"a",} + {"a":,} + {"a": ,} + { , """ + + test "invalid JSON: comma inside empty array/object": + checkEachLineIsInvalidJson """ + [,] + [, ] + {,} + {, }""" + + test "invalid JSON: trailing comma inside non-empty array": + # These lines raise only with the patched std/json + checkEachLineIsInvalidJson """ + [1,] + [1, ] + [1, 2,] + [1, 2, ]""" + + test "invalid JSON: trailing comma inside non-empty object": + # These lines raise only with the patched std/json + checkEachLineIsInvalidJson """ + {"a": 1,} + {"a": 1, "b": 2,}""" + + test "invalid JSON: trailing comma inside both object and array": + # These lines raise only with the patched std/json + checkEachLineIsInvalidJson """ + {"a": [1,],} + {"a": [1, 2,], "b": 3} + {"a": [1, 2,], "b": 3,}""" + + test "invalid JSON: a line comment": + # This raises only with the patched std/json + const s = """ + { + "a": 1, + "b": 2, // here is a line comment + "c": 3 + }""" + expect JsonParsingError: + discard parseJson(s) + + test "invalid JSON: a multi-line comment": + # This raises only with the patched std/json + const s = """ + { + "a": 1, + /* here is + a multi-line + comment */ + "b": 2, + "c": 3 + }""" + expect JsonParsingError: + discard parseJson(s) + +proc main = + testJsonParser() + +main() +{.used.}