From 80cf9cebf0320a66dd94948c1bc68acc08d9f640 Mon Sep 17 00:00:00 2001 From: Philippe Proulx Date: Mon, 28 Feb 2022 10:30:49 -0500 Subject: [PATCH] src/cpp-common: add bt2_common::parseJson() functions (listener mode) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the bt2_common::parseJson() functions in `parse-json.hpp`. Those functions wrap the file-internal `bt2_common::internal::JsonParser` class of which an instance can parse a single JSON value, calling specific methods of a JSON event listener as it processes. Internally, `bt2_common::internal::JsonParser` uses a string scanner (`bt2_common::StrScanner`). In searching for a simple JSON parsing solution, I could not find, as of this date, any project which satisfies the following requirements out of the box: * Is well-known, well documented, and well tested. * Has an MIT-compatible license. * Parses both unsigned and signed 64-bit integers (range -9,223,372,036,854,775,808 to 18,446,744,073,709,551,615). * Provides an exact text location (offset, line number, column number) on parsing error. * Provides an exact text location (offset, line number, column number) for each parsed value. I believe the text locations are essential as this JSON parser will be used to decode CTF2‑SPECRC‑4.0 [1] auxiliary and metadata streams: because Babeltrace 2 will be a reference implementation of CTF 2, it makes sense to make an effort to pinpoint the exact location of syntactic and semantic errors. More specifically: * JSON for Modern C++ (by Niels Lohmann) [2] doesn't support text location access, although there's a pending pull request (draft as of this date) to add such support [3]. * The exceptions of JsonCpp [4] don't contain a text location, only a message. * SimpleJSON [5] doesn't offer text location access and seems to be an archived project. * RapidJSON [6] doesn't offer text location access. * yajl [7] could offer some form of text location access (offset, at least) with yajl_get_bytes_consumed(), remembering the last offset on our side, although I don't know how nice it would play with whitespaces. That being said, regarding integers, the `yajl_callbacks` structure [8] only contains a `yajl_integer` function pointer which receives a `long long` value (no direct 64-bit unsigned integer support). It's possible to set the `yajl_number` callback for any number, but the `yajl_double` callback gets disabled in that case, and the callback receives a string which needs further parsing on our side: this is pretty much what's implemented `bt2_common::StrScanner` anyway. At this point I stopped searching as I already had a working and tested string scanner and, as you can see, without comments, `parse-json.hpp` is only 231 lines of effective code and satisfies all the requirements above. You can test bt2_common::parseJson() with a simple program like this: #include #include #include "parse-json.hpp" struct Printer { void onNull(const bt2_common::TextLoc&) { std::cout << "null\n"; } template void onScalarVal(const ValT& val, const bt2_common::TextLoc&) { std::cout << val << '\n'; } void onArrayBegin(const bt2_common::TextLoc&) { std::cout << "[\n"; } void onArrayEnd(const bt2_common::TextLoc&) { std::cout << "]\n"; } void onObjBegin(const bt2_common::TextLoc&) { std::cout << "{\n"; } void onObjKey(const std::string& key, const bt2_common::TextLoc&) { std::cout << key << ": "; } void onObjEnd(const bt2_common::TextLoc&) { std::cout << "}\n"; } }; int main(const int, const char * const * const argv) { Printer printer; bt2_common::parseJson(argv[1], printer); } Then: $ ./test-parse-json 23 $ ./test-parse-json '"\u03c9 represents angular velocity"' $ ./test-parse-json '{"salut": [23, true, 42.4e-9, {"meow": null}]}' $ ./test-parse-json 18446744073709551615 $ ./test-parse-json -9223372036854775808 Also try some parsing errors: $ ./test-parse-json '{"salut": [false, 42.4e-9, "meow": null}]}' $ ./test-parse-json 18446744073709551616 $ ./test-parse-json -9223372036854775809 $ ./test-parse-json '"invalid \u8dkf codepoint"' [1]: https://diamon.org/ctf/files/CTF2-SPECRC-4.0.html [2]: https://github.com/nlohmann/json [3]: https://github.com/nlohmann/json/pull/3165 [4]: https://github.com/open-source-parsers/jsoncpp [5]: https://github.com/nbsdx/SimpleJSON [6]: https://rapidjson.org/ [7]: https://github.com/lloyd/yajl [8]: https://lloyd.github.io/yajl/yajl-2.1.0/structyajl__callbacks.html Signed-off-by: Philippe Proulx Change-Id: Id32c2b64723ca50b044369c424fe046c0a183cce Reviewed-on: https://review.lttng.org/c/babeltrace/+/7411 --- src/cpp-common/Makefile.am | 8 +- src/cpp-common/parse-json.hpp | 481 ++++++++++++++++++++++++++++++++++ 2 files changed, 488 insertions(+), 1 deletion(-) create mode 100644 src/cpp-common/parse-json.hpp diff --git a/src/cpp-common/Makefile.am b/src/cpp-common/Makefile.am index 592b90476..e9582c007 100644 --- a/src/cpp-common/Makefile.am +++ b/src/cpp-common/Makefile.am @@ -7,4 +7,10 @@ libcppcommon_la_SOURCES = \ text-parse-error.hpp text-parse-error.cpp \ str-scanner.hpp str-scanner.cpp -EXTRA_DIST = bt2 optional.hpp string_view.hpp uuid-view.hpp nlohmann/json.hpp +EXTRA_DIST = \ + bt2 \ + optional.hpp \ + string_view.hpp \ + uuid-view.hpp \ + nlohmann/json.hpp \ + parse-json.hpp diff --git a/src/cpp-common/parse-json.hpp b/src/cpp-common/parse-json.hpp new file mode 100644 index 000000000..3687bb3db --- /dev/null +++ b/src/cpp-common/parse-json.hpp @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2022 Philippe Proulx + * + * SPDX-License-Identifier: MIT + */ + +#ifndef BABELTRACE_CPP_COMMON_PARSE_JSON_HPP +#define BABELTRACE_CPP_COMMON_PARSE_JSON_HPP + +#include +#include +#include +#include + +#include "common/assert.h" +#include "str-scanner.hpp" +#include "text-parse-error.hpp" + +namespace bt2_common { +namespace internal { + +/* + * JSON text parser. + * + * This parser parses a single JSON value, calling the methods of a JSON + * event listener of type `ListenerT` for each JSON event. + * + * The requirements of `ListenerT` are the following public methods: + * + * void onNull(const TextLoc&); + * void onScalarVal(bool, const TextLoc&); + * void onScalarVal(unsigned long long, const TextLoc&); + * void onScalarVal(long long, const TextLoc&); + * void onScalarVal(double, const TextLoc&); + * void onScalarVal(const std::string&, const TextLoc&); + * void onArrayBegin(const TextLoc&); + * void onArrayEnd(const TextLoc&); + * void onObjBegin(const TextLoc&); + * void onObjKey(const std::string&, const TextLoc&); + * void onObjEnd(const TextLoc&); + * + * The received text location always indicate the location of the + * _beginning_ of the text representing the corresponding JSON value. + * + * This parser honours the grammar of , not + * parsing special floating-point number tokens (`nan`, `inf`, and the + * rest) or C-style comments. + */ +template +class JsonParser final +{ +public: + /* + * Builds a JSON text parser, wrapping a string between `begin` + * (included) and `end` (excluded), and parses it, calling the + * methods of the JSON event listener `listener`. + * + * Throws `TextParseError` when there's a parsing error, including + * when it can't fully parse the JSON string as a valid JSON value. + */ + explicit JsonParser(const char *begin, const char *end, ListenerT& listener); + +private: + /* + * Parses the whole JSON string. + */ + void _parse(); + + /* + * Expects a JSON value, throwing a text parse error if not found. + */ + void _expectVal(); + + /* + * Tries to parse `null`, calling the event listener on success. + */ + bool _tryParseNull(); + + /* + * Tries to parse `true` or `false`, calling the event listener on + * success. + */ + bool _tryParseBool(); + + /* + * Tries to parse a JSON number, calling the event listener on + * success. + */ + bool _tryParseNumber(); + + /* + * Tries to parse a JSON object key, calling the event listener on + * success. + */ + bool _tryParseObjKey(); + + /* + * Tries to parse a JSON string, calling the event listener on + * success. + */ + bool _tryParseStr(); + + /* + * Tries to parse a JSON array, calling the event listener on + * success. + */ + bool _tryParseArray(); + + /* + * Tries to parse a JSON object, calling the event listener on + * success. + */ + bool _tryParseObj(); + + /* + * Expects the specific token `token`, throwing a text parse error + * if not found. + */ + void _expectToken(const char * const token) + { + if (!_mSs.tryScanToken(token)) { + std::ostringstream ss; + + ss << "Expecting `" << token << "`."; + throw TextParseError {ss.str(), _mSs.loc()}; + } + } + + /* + * Calls StrScanner::tryScanLitStr() with the JSON-specific escape + * sequence starting characters. + */ + const std::string *_tryScanLitStr() + { + return _mSs.tryScanLitStr("/bfnrtu"); + } + + /* + * Returns whether or not the current character of the underlying + * string scanner looks like the beginning of the fractional or + * exponent part of a constant real number. + */ + bool _ssCurCharLikeConstRealFracOrExp() const noexcept + { + return *_mSs.at() == '.' || *_mSs.at() == 'E' || *_mSs.at() == 'e'; + } + +private: + /* Underlying string scanner */ + StrScanner _mSs; + + /* JSON event listener */ + ListenerT *_mListener; + + /* Object key sets, one for each JSON object level, to detect duplicates */ + std::vector> _mKeys; +}; + +template +JsonParser::JsonParser(const char * const begin, const char * const end, + ListenerT& listener) : + _mSs {begin, end}, + _mListener {&listener} +{ + BT_ASSERT(end >= begin); + this->_parse(); +} + +template +void JsonParser::_expectVal() +{ + if (this->_tryParseNull()) { + return; + } + + if (this->_tryParseBool()) { + return; + } + + if (this->_tryParseStr()) { + return; + } + + if (this->_tryParseArray()) { + return; + } + + if (this->_tryParseObj()) { + return; + } + + if (this->_tryParseNumber()) { + return; + } + + throw TextParseError { + "Expecting a JSON value: `null`, `true`, `false`, a supported number " + "(for an integer: -9,223,372,036,854,775,808 to 18,446,744,073,709,551,615), " + "`\"` (a string), `[` (an array), or `{` (an object).", + _mSs.loc()}; +} + +template +void JsonParser::_parse() +{ + /* Expect a single JSON value */ + this->_expectVal(); + + /* Skip trailing whitespaces */ + _mSs.skipWhitespaces(); + + /* Make sure all the text is consumed */ + if (!_mSs.isDone()) { + throw TextParseError {"Extra data after parsed JSON value.", _mSs.loc()}; + } +} + +template +bool JsonParser::_tryParseNull() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (_mSs.tryScanToken("null")) { + _mListener->onNull(loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseBool() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (_mSs.tryScanToken("true")) { + _mListener->onScalarVal(true, loc); + return true; + } else if (_mSs.tryScanToken("false")) { + _mListener->onScalarVal(false, loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseNumber() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + /* + * The `_mSs.tryScanConstReal()` call below is somewhat expensive + * currently because it involves calling std::regex_search() to + * confirm the constant real number form. + * + * The strategy below is to: + * + * 1. Keep the current position P of the string scanner. + * + * 2. Call `_mSs.tryScanConstUInt()` and + * `_mSs.tryScanConstSInt()` first. + * + * If either one succeeds, make sure the scanned JSON number + * can't be in fact a real number. If it can, then reset the + * position of the string scanner to P. It's safe to reset the + * string scanner position at this point because + * `_mSs.skipWhitespaces()` was called above and the constant + * number scanning methods won't scan a newline character. + * + * 3. Call `_mSs.tryScanConstReal()` last. + */ + const auto at = _mSs.at(); + + if (const auto uIntVal = _mSs.tryScanConstUInt()) { + if (!this->_ssCurCharLikeConstRealFracOrExp()) { + /* Confirmed unsigned integer form */ + _mListener->onScalarVal(*uIntVal, loc); + return true; + } + + /* Looks like a constant real number: backtrack */ + _mSs.at(at); + } else if (const auto sIntVal = _mSs.tryScanConstSInt()) { + if (!this->_ssCurCharLikeConstRealFracOrExp()) { + /* Confirmed signed integer form */ + _mListener->onScalarVal(*sIntVal, loc); + return true; + } + + /* Looks like a constant real number: backtrack */ + _mSs.at(at); + } + + if (const auto realVal = _mSs.tryScanConstReal()) { + _mListener->onScalarVal(*realVal, loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseStr() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (const auto str = this->_tryScanLitStr()) { + _mListener->onScalarVal(*str, loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseObjKey() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (const auto str = this->_tryScanLitStr()) { + /* _tryParseObj() pushes */ + BT_ASSERT(!_mKeys.empty()); + + /* Insert, checking for duplicate key */ + if (!_mKeys.back().insert(*str).second) { + std::ostringstream ss; + + ss << "Duplicate JSON object key `" << *str << "`."; + throw TextParseError {ss.str(), loc}; + } + + _mListener->onObjKey(*str, loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseArray() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (!_mSs.tryScanToken("[")) { + return false; + } + + /* Beginning of array */ + _mListener->onArrayBegin(loc); + + if (_mSs.tryScanToken("]")) { + /* Empty array */ + _mListener->onArrayEnd(loc); + return true; + } + + while (true) { + /* Expect array element */ + this->_expectVal(); + + if (!_mSs.tryScanToken(",")) { + /* No more array elements */ + break; + } + } + + /* End of array */ + this->_expectToken("]"); + _mListener->onArrayEnd(loc); + return true; +} + +template +bool JsonParser::_tryParseObj() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (!_mSs.tryScanToken("{")) { + return false; + } + + /* Beginning of object */ + _mListener->onObjBegin(loc); + + if (_mSs.tryScanToken("}")) { + /* Empty object */ + _mListener->onObjEnd(loc); + return true; + } + + /* New level of object keys */ + _mKeys.push_back({}); + + while (true) { + /* Expect object key */ + _mSs.skipWhitespaces(); + + if (!this->_tryParseObjKey()) { + throw TextParseError {"Expecting a JSON object key (double-quoted string).", + _mSs.loc()}; + } + + /* Expect colon */ + this->_expectToken(":"); + + /* Expect entry value */ + this->_expectVal(); + + if (!_mSs.tryScanToken(",")) { + /* No more entries */ + break; + } + } + + /* End of object */ + BT_ASSERT(!_mKeys.empty()); + _mKeys.pop_back(); + this->_expectToken("}"); + _mListener->onObjEnd(loc); + return true; +} + +} /* namespace internal */ + +/* + * Parses the JSON text between `begin` and `end` (excluded), calling + * the methods of `listener` for each JSON event (see + * `internal::JsonParser` for the requirements of `ListenerT`). + * + * Throws `TextParseError` on error. + */ +template +void parseJson(const char * const begin, const char * const end, ListenerT& listener) +{ + internal::JsonParser {begin, end, listener}; +} + +/* + * Parses the null-terminated JSON text `str`, calling the methods of + * `listener` for each JSON event (see `internal::JsonParser` for the + * requirements of `ListenerT`). + * + * Throws `TextParseError` on error. + */ +template +void parseJson(const char * const str, ListenerT& listener) +{ + parseJson(str, str + std::strlen(str), listener); +} + +/* + * Parses the JSON text `str`, calling the methods of `listener` for + * each JSON event (see `internal::JsonParser` for the requirements of + * `ListenerT`). + * + * Throws `TextParseError` on error. + */ +template +void parseJson(const std::string& str, ListenerT& listener) +{ + parseJson(str.data(), str.data() + str.size(), listener); +} + +} /* namespace bt2_common */ + +#endif /* BABELTRACE_CPP_COMMON_PARSE_JSON_HPP */