diff --git a/src/Makefile.am b/src/Makefile.am index da671fcae..54fa76cc7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -170,6 +170,7 @@ cpp_common_libcpp_common_la_SOURCES = \ cpp-common/bt2c/glib-up.hpp \ cpp-common/bt2c/libc-up.hpp \ cpp-common/bt2c/logging.hpp \ + cpp-common/bt2c/parse-json.hpp \ cpp-common/bt2c/prio-heap.hpp \ cpp-common/bt2c/read-fixed-len-int.hpp \ cpp-common/bt2c/safe-ops.hpp \ diff --git a/src/cpp-common/bt2c/parse-json.hpp b/src/cpp-common/bt2c/parse-json.hpp new file mode 100644 index 000000000..166b6d675 --- /dev/null +++ b/src/cpp-common/bt2c/parse-json.hpp @@ -0,0 +1,540 @@ +/* + * Copyright (c) 2022 Philippe Proulx + * + * SPDX-License-Identifier: MIT + */ + +#ifndef BABELTRACE_CPP_COMMON_BT2C_PARSE_JSON_HPP +#define BABELTRACE_CPP_COMMON_BT2C_PARSE_JSON_HPP + +#include +#include +#include + +#include "common/assert.h" + +#include "exc.hpp" +#include "str-scanner.hpp" +#include "text-loc-str.hpp" +#include "text-loc.hpp" + +namespace bt2c { +namespace internal { + +/* + * JSON text parser. + * + * This parser parses a single JSON value, calling the methods of a JSON + * event listener of type `ListenerT` for each JSON event. + * + * The requirements of `ListenerT` are the following public methods: + * + * void onNull(const TextLoc&); + * void onScalarVal(bool, const TextLoc&); + * void onScalarVal(unsigned long long, const TextLoc&); + * void onScalarVal(long long, const TextLoc&); + * void onScalarVal(double, const TextLoc&); + * void onScalarVal(const std::string&, const TextLoc&); + * void onArrayBegin(const TextLoc&); + * void onArrayEnd(const TextLoc&); + * void onObjBegin(const TextLoc&); + * void onObjKey(const std::string&, const TextLoc&); + * void onObjEnd(const TextLoc&); + * + * The received text location always indicate the location of the + * _beginning_ of the text representing the corresponding JSON value. + * + * This parser honours the grammar of , not + * parsing special floating-point number tokens (`nan`, `inf`, and the + * rest) or C-style comments. + */ +template +class JsonParser final +{ +public: + /* + * Builds a JSON text parser, wrapping a string between `begin` + * (included) and `end` (excluded), and parses it, calling the + * methods of the JSON event listener `listener`. + * + * Adds to the text location offset for all error messages. + * + * When the JSON parser logs or appends a cause to the error of the + * current thread, it uses `baseOffset` and `textLocStrFmt` to + * format the text location part of the message. + */ + explicit JsonParser(const char *begin, const char *end, ListenerT& listener, + std::size_t baseOffset, const bt2c::Logger& parentLogger, + TextLocStrFmt textLocStrFmt = TextLocStrFmt::LINE_COL_NOS_AND_OFFSET); + +private: + /* + * Parses the whole JSON string. + */ + void _parse(); + + /* + * Expects a JSON value, appending a cause to the error of the + * current thread and throwing `Error` if not found. + */ + void _expectVal(); + + /* + * Tries to parse `null`, calling the event listener on success. + */ + bool _tryParseNull(); + + /* + * Tries to parse `true` or `false`, calling the event listener on + * success. + */ + bool _tryParseBool(); + + /* + * Tries to parse a JSON number, calling the event listener on + * success. + */ + bool _tryParseNumber(); + + /* + * Tries to parse a JSON object key, calling the event listener on + * success. + */ + bool _tryParseObjKey(); + + /* + * Tries to parse a JSON string, calling the event listener on + * success. + */ + bool _tryParseStr(); + + /* + * Tries to parse a JSON array, calling the event listener on + * success. + */ + bool _tryParseArray(); + + /* + * Tries to parse a JSON object, calling the event listener on + * success. + */ + bool _tryParseObj(); + + /* + * Expects the specific token `token`, appending a cause to the + * error of the current thread and throwing `Error` if not found. + */ + void _expectToken(const char * const token) + { + if (!_mSs.tryScanToken(token)) { + BT_CPPLOGE_APPEND_CAUSE_AND_THROW(Error, "[{}] Expecting `{}`.", this->_locStr(), + token); + } + } + + /* + * Calls StrScanner::tryScanLitStr() with the JSON-specific escape + * sequence starting characters. + */ + const std::string *_tryScanLitStr() + { + return _mSs.tryScanLitStr("/bfnrtu"); + } + + /* + * Returns whether or not the current character of the underlying + * string scanner looks like the beginning of the fractional or + * exponent part of a constant real number. + */ + bool _ssCurCharLikeConstRealFracOrExp() const noexcept + { + return *_mSs.at() == '.' || *_mSs.at() == 'E' || *_mSs.at() == 'e'; + } + + /* + * Returns the current text location of the underlying string + * scanner as a string, following `_mTextLocStrFmt`. + */ + std::string _locStr() const + { + return textLocStr(_mSs.loc(), _mTextLocStrFmt); + } + +private: + /* Logging configuration */ + Logger _mLogger; + + /* Underlying string scanner */ + StrScanner _mSs; + + /* JSON event listener */ + ListenerT *_mListener; + + /* Object key sets, one for each JSON object level, to detect duplicates */ + std::vector> _mKeys; + + /* Text location string format */ + TextLocStrFmt _mTextLocStrFmt; +}; + +template +JsonParser::JsonParser(const char * const begin, const char * const end, + ListenerT& listener, const std::size_t baseOffset, + const bt2c::Logger& parentLogger, + const TextLocStrFmt textLocStrFmt) : + _mLogger {parentLogger, "PARSE-JSON"}, + _mSs {begin, end, baseOffset, parentLogger, textLocStrFmt}, _mListener {&listener}, + _mTextLocStrFmt {textLocStrFmt} +{ + BT_ASSERT(end >= begin); + this->_parse(); +} + +template +void JsonParser::_expectVal() +{ + if (this->_tryParseNull()) { + return; + } + + if (this->_tryParseBool()) { + return; + } + + if (this->_tryParseStr()) { + return; + } + + if (this->_tryParseArray()) { + return; + } + + if (this->_tryParseObj()) { + return; + } + + if (this->_tryParseNumber()) { + return; + } + + BT_CPPLOGE_APPEND_CAUSE_AND_THROW( + Error, + "[{}] Expecting a JSON value: `null`, `true`, `false`, a supported number " + "(for an integer: -9,223,372,036,854,775,808 to 18,446,744,073,709,551,615), " + "`\"` (a string), `[` (an array), or `{` (an object).", + this->_locStr()); +} + +template +void JsonParser::_parse() +{ + /* Expect a single JSON value */ + this->_expectVal(); + + /* Skip trailing whitespaces */ + _mSs.skipWhitespaces(); + + /* Make sure all the text is consumed */ + if (!_mSs.isDone()) { + BT_CPPLOGE_APPEND_CAUSE_AND_THROW(Error, "[{}] Extra data after parsed JSON value.", + this->_locStr()); + } +} + +template +bool JsonParser::_tryParseNull() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (_mSs.tryScanToken("null")) { + _mListener->onNull(loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseBool() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (_mSs.tryScanToken("true")) { + _mListener->onScalarVal(true, loc); + return true; + } else if (_mSs.tryScanToken("false")) { + _mListener->onScalarVal(false, loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseNumber() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + /* + * The `_mSs.tryScanConstReal()` call below is somewhat expensive + * currently because it involves calling std::regex_search() to + * confirm the constant real number form. + * + * The strategy below is to: + * + * 1. Keep the current position P of the string scanner. + * + * 2. Call `_mSs.tryScanConstUInt()` and + * `_mSs.tryScanConstSInt()` first. + * + * If either one succeeds, make sure the scanned JSON number + * can't be in fact a real number. If it can, then reset the + * position of the string scanner to P. It's safe to reset the + * string scanner position at this point because + * `_mSs.skipWhitespaces()` was called above and the constant + * number scanning methods won't scan a newline character. + * + * 3. Call `_mSs.tryScanConstReal()` last. + */ + const auto at = _mSs.at(); + + if (const auto uIntVal = _mSs.tryScanConstUInt()) { + if (!this->_ssCurCharLikeConstRealFracOrExp()) { + /* Confirmed unsigned integer form */ + _mListener->onScalarVal(*uIntVal, loc); + return true; + } + + /* Looks like a constant real number: backtrack */ + _mSs.at(at); + } else if (const auto sIntVal = _mSs.tryScanConstSInt()) { + if (!this->_ssCurCharLikeConstRealFracOrExp()) { + /* Confirmed signed integer form */ + _mListener->onScalarVal(*sIntVal, loc); + return true; + } + + /* Looks like a constant real number: backtrack */ + _mSs.at(at); + } + + if (const auto realVal = _mSs.tryScanConstReal()) { + _mListener->onScalarVal(*realVal, loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseStr() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (const auto str = this->_tryScanLitStr()) { + _mListener->onScalarVal(*str, loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseObjKey() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (const auto str = this->_tryScanLitStr()) { + /* _tryParseObj() pushes */ + BT_ASSERT(!_mKeys.empty()); + + /* Insert, checking for duplicate key */ + if (!_mKeys.back().insert(*str).second) { + BT_CPPLOGE_APPEND_CAUSE_AND_THROW(Error, "[{}] Duplicate JSON object key `{}`.", + this->_locStr(), *str); + } + + _mListener->onObjKey(*str, loc); + return true; + } + + return false; +} + +template +bool JsonParser::_tryParseArray() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (!_mSs.tryScanToken("[")) { + return false; + } + + /* Beginning of array */ + _mListener->onArrayBegin(loc); + + if (_mSs.tryScanToken("]")) { + /* Empty array */ + _mListener->onArrayEnd(loc); + return true; + } + + while (true) { + /* Expect array element */ + this->_expectVal(); + + if (!_mSs.tryScanToken(",")) { + /* No more array elements */ + break; + } + } + + /* End of array */ + this->_expectToken("]"); + _mListener->onArrayEnd(loc); + return true; +} + +template +bool JsonParser::_tryParseObj() +{ + _mSs.skipWhitespaces(); + + const auto loc = _mSs.loc(); + + if (!_mSs.tryScanToken("{")) { + return false; + } + + /* Beginning of object */ + _mListener->onObjBegin(loc); + + if (_mSs.tryScanToken("}")) { + /* Empty object */ + _mListener->onObjEnd(loc); + return true; + } + + /* New level of object keys */ + _mKeys.push_back({}); + + while (true) { + /* Expect object key */ + _mSs.skipWhitespaces(); + + if (!this->_tryParseObjKey()) { + BT_CPPLOGE_APPEND_CAUSE_AND_THROW( + Error, "[{}] Expecting a JSON object key (double-quoted string).", this->_locStr()); + } + + /* Expect colon */ + this->_expectToken(":"); + + /* Expect entry value */ + this->_expectVal(); + + if (!_mSs.tryScanToken(",")) { + /* No more entries */ + break; + } + } + + /* End of object */ + BT_ASSERT(!_mKeys.empty()); + _mKeys.pop_back(); + this->_expectToken("}"); + _mListener->onObjEnd(loc); + return true; +} + +} /* namespace internal */ + +/* + * Parses the JSON text between `begin` and `end` (excluded), calling + * the methods of `listener` for each JSON event (see + * `internal::JsonParser` for the requirements of `ListenerT`). + * + * When the function logs or appends a cause to the error of the current + * thread, it uses `baseOffset` and `textLocStrFmt` to format the text + * location part of the message. + */ +template +void parseJson(const char * const begin, const char * const end, ListenerT& listener, + const std::size_t baseOffset, const bt2c::Logger& parentLogger, + const TextLocStrFmt textLocStrFmt = TextLocStrFmt::LINE_COL_NOS_AND_OFFSET) +{ + internal::JsonParser {begin, end, listener, baseOffset, parentLogger, textLocStrFmt}; +} + +template +void parseJson(const char * const begin, const char * const end, ListenerT& listener, + const bt2c::Logger& parentLogger, + const TextLocStrFmt textLocStrFmt = TextLocStrFmt::LINE_COL_NOS_AND_OFFSET) +{ + parseJson(begin, end, listener, 0, parentLogger, textLocStrFmt); +} + +/* + * Parses the null-terminated JSON text `str`, calling the methods of + * `listener` for each JSON event (see `internal::JsonParser` for the + * requirements of `ListenerT`). + * + * When the function logs or appends a cause to the error of the current + * thread, it uses `baseOffset` and `textLocStrFmt` to format the text + * location part of the message. + */ +template +void parseJson(const char * const str, ListenerT& listener, const std::size_t baseOffset, + const bt2c::Logger& parentLogger, + const TextLocStrFmt textLocStrFmt = TextLocStrFmt::LINE_COL_NOS_AND_OFFSET) +{ + parseJson(str, str + std::strlen(str), listener, baseOffset, parentLogger, textLocStrFmt); +} + +template +void parseJson(const char * const str, ListenerT& listener, const bt2c::Logger& parentLogger, + const TextLocStrFmt textLocStrFmt = TextLocStrFmt::LINE_COL_NOS_AND_OFFSET) +{ + parseJson(str, listener, 0, parentLogger, 0, textLocStrFmt); +} + +/* + * Parses the JSON text `str`, calling the methods of `listener` for + * each JSON event (see `internal::JsonParser` for the requirements of + * `ListenerT`). + * + * When the function logs or appends a cause to the error of the current + * thread, it uses `baseOffset` and `textLocStrFmt` to format the text + * location part of the message. + */ +template +void parseJson(const std::string& str, ListenerT& listener, const std::size_t baseOffset, + const bt2c::Logger& parentLogger, + const TextLocStrFmt textLocStrFmt = TextLocStrFmt::LINE_COL_NOS_AND_OFFSET) +{ + parseJson(str.data(), str.data() + str.size(), listener, baseOffset, parentLogger, + textLocStrFmt); +} + +template +void parseJson(const std::string& str, ListenerT& listener, const bt2c::Logger& parentLogger, + const TextLocStrFmt textLocStrFmt = TextLocStrFmt::LINE_COL_NOS_AND_OFFSET) +{ + parseJson(str, listener, 0, parentLogger, textLocStrFmt); +} + +} /* namespace bt2c */ + +#endif /* BABELTRACE_CPP_COMMON_BT2C_PARSE_JSON_HPP */