From db06dab6abb2fc41435f23fca40fe15a87c5d38a Mon Sep 17 00:00:00 2001 From: barcode Date: Fri, 23 Dec 2022 14:32:04 +0100 Subject: [PATCH] Use nlohmann::position_t instead of lexer for detailed position information when using a sax parser --- .../sax_parse_with_src_location_in_json.cpp | 17 ++-- .../docs/api/json_sax/next_token_end.md | 25 +++--- .../docs/api/json_sax/next_token_start.md | 25 +++--- .../api/position_t/chars_read_current_line.md | 28 +++++++ .../docs/api/position_t/chars_read_total.md | 28 +++++++ docs/mkdocs/docs/api/position_t/index.md | 23 ++++++ docs/mkdocs/docs/api/position_t/lines_read.md | 28 +++++++ .../docs/api/position_t/operator_size_t.md | 28 +++++++ .../docs/features/parsing/sax_interface.md | 24 ++++++ docs/mkdocs/mkdocs.yml | 8 ++ include/nlohmann/detail/input/position_t.hpp | 5 -- include/nlohmann/detail/meta/is_sax.hpp | 72 +++++++++++------ single_include/nlohmann/json.hpp | 77 ++++++++++++------- tests/src/unit-sax-parser-extended.cpp | 52 ++++++------- .../unit-sax-parser-store-source-location.cpp | 34 ++++---- 15 files changed, 330 insertions(+), 144 deletions(-) create mode 100644 docs/mkdocs/docs/api/position_t/chars_read_current_line.md create mode 100644 docs/mkdocs/docs/api/position_t/chars_read_total.md create mode 100644 docs/mkdocs/docs/api/position_t/index.md create mode 100644 docs/mkdocs/docs/api/position_t/lines_read.md create mode 100644 docs/mkdocs/docs/api/position_t/operator_size_t.md diff --git a/docs/examples/sax_parse_with_src_location_in_json.cpp b/docs/examples/sax_parse_with_src_location_in_json.cpp index ab9b30cc58..cf7adc1fbd 100644 --- a/docs/examples/sax_parse_with_src_location_in_json.cpp +++ b/docs/examples/sax_parse_with_src_location_in_json.cpp @@ -9,14 +9,13 @@ using json = nlohmann::json; // allows us to store metadata and add custom methods to each node struct token_start_stop { - nlohmann::detail::position_t start{}; - nlohmann::detail::position_t stop{}; + nlohmann::position_t start{}; + nlohmann::position_t stop{}; std::string start_pos_str() const { return "{l=" + std::to_string(start.lines_read) + ":c=" - //the lexer is already one char ahead (e.g. the opening { of an object ) - + std::to_string(start.chars_read_current_line - 1) + "}"; + + std::to_string(start.chars_read_current_line) + "}"; } std::string stop_pos_str() const { @@ -68,16 +67,14 @@ class sax_with_token_start_stop_metadata , start_stop{} {} - template - void next_token_start(const nlohmann::detail::lexer& lex) + void next_token_start(const nlohmann::position_t& p) { - start_stop.start = lex.get_position(); + start_stop.start = p; } - template - void next_token_end(const nlohmann::detail::lexer& lex) + void next_token_end(const nlohmann::position_t& p) { - start_stop.stop = lex.get_position(); + start_stop.stop = p; } bool null() diff --git a/docs/mkdocs/docs/api/json_sax/next_token_end.md b/docs/mkdocs/docs/api/json_sax/next_token_end.md index 25f9fd4bd7..69e5da9656 100644 --- a/docs/mkdocs/docs/api/json_sax/next_token_end.md +++ b/docs/mkdocs/docs/api/json_sax/next_token_end.md @@ -7,31 +7,23 @@ There are two possible signatures for this method: ```cpp void next_token_end(std::size_t pos); ``` -This version is called with the byte position after the next element ends. This version also works when parsing binary formats such as [msgpack](../basic_json/input_format_t.md). +This version is called with the byte position after the next element ends. +This version also works when parsing binary formats such as [msgpack](../basic_json/input_format_t.md). 2. ```cpp -template -void next_token_end(const nlohmann::detail::lexer& lex) +void next_token_end(const nlohmann::position_t& p) ``` -This version is called with the lexer after the last character of the next element was parsed. The lexer can provide additional information about the current parse context. This version only available when calling `nlohmann::json::sax_parse` with `nlohmann::json::input_format_t::json` and takes precedence. - -## Template parameters -1. -(none) -2. -`BasicJsonType` -: a specialization of `basic_json` used by the lexer. (Leave this as a template parameter) -`InputAdapterType` -: The input adapter used by the lexer. (Leave this as a template parameter) +This version is called with the [detailed parser position information](../position_t/index.md) after the last character of the next element was parsed. +This version only available when calling `nlohmann::json::sax_parse` with `nlohmann::json::input_format_t::json` and takes precedence. ## Parameters 1. `pos` (in) : Byte position one after the next elements last byte. 2. -`lex` (in) -: Lexer after the last char of the next element was parsed. +`p` (in) +: [Detailed parser position information](../position_t/index.md) after the last char of the next element was parsed. ## Notes @@ -57,7 +49,8 @@ It is recommended, but not required, to also implement [next_token_start](next_t ??? example - The example below shows a SAX parser using the second version of this method and storing the location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point. + The example below shows a SAX parser using the second version of this method and + storing the location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point. ```cpp --8<-- "examples/sax_parse_with_src_location_in_json.cpp" diff --git a/docs/mkdocs/docs/api/json_sax/next_token_start.md b/docs/mkdocs/docs/api/json_sax/next_token_start.md index 9543f53ad6..49289b7ac5 100644 --- a/docs/mkdocs/docs/api/json_sax/next_token_start.md +++ b/docs/mkdocs/docs/api/json_sax/next_token_start.md @@ -7,31 +7,23 @@ There are two possible signatures for this method: ```cpp void next_token_start(std::size_t pos); ``` -This version is called with the byte position where the next element starts. This version also works when parsing binary formats such as [msgpack](../basic_json/input_format_t.md). +This version is called with the byte position where the next element starts. +This version also works when parsing binary formats such as [msgpack](../basic_json/input_format_t.md). 2. ```cpp -template -void next_token_start(const nlohmann::detail::lexer& lex) +void next_token_start(const nlohmann::position_t& p) ``` -This version is called with the lexer after the first character of the next element was parsed. The lexer can provide additional information about the current parse context. This version only available when calling `nlohmann::json::sax_parse` with `nlohmann::json::input_format_t::json` and takes precedence. - -## Template parameters -1. -(none) -2. -`BasicJsonType` -: a specialization of `basic_json` used by the lexer. (Leave this as a template parameter) -`InputAdapterType` -: The input adapter used by the lexer. (Leave this as a template parameter) +This version is called with [detailed parser position information](../position_t/index.md). +This version only available when calling `nlohmann::json::sax_parse` with `nlohmann::json::input_format_t::json` and takes precedence. ## Parameters 1. `pos` (in) : Byte position where the next element starts. 2. -`lex` (in) -: Lexer after the first char of the next element was parsed. +`p` (in) +: [Detailed parser position information](../position_t/index.md) after the first char of the next element was parsed. ## Notes @@ -57,7 +49,8 @@ It is recommended, but not required, to also implement [next_token_end](next_tok ??? example - The example below shows a SAX parser using the second version of this method and storing the location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point. + The example below shows a SAX parser using the second version of this method and + storing the location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point. ```cpp --8<-- "examples/sax_parse_with_src_location_in_json.cpp" diff --git a/docs/mkdocs/docs/api/position_t/chars_read_current_line.md b/docs/mkdocs/docs/api/position_t/chars_read_current_line.md new file mode 100644 index 0000000000..740a3875d3 --- /dev/null +++ b/docs/mkdocs/docs/api/position_t/chars_read_current_line.md @@ -0,0 +1,28 @@ +# nlohmann::position_t::chars_read_current_line + +```cpp +std::size_t chars_read_current_line; +``` + +The number of characters read in the current line. + +## Examples + +??? example + + The example below shows a SAX receiving the element bounds as `nlohmann::position_t` and + storing this location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point. + + ```cpp + --8<-- "examples/sax_parse_with_src_location_in_json.cpp" + ``` + + Output: + + ```json + --8<-- "examples/sax_parse_with_src_location_in_json.output" + ``` + +## Version history + +- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???. diff --git a/docs/mkdocs/docs/api/position_t/chars_read_total.md b/docs/mkdocs/docs/api/position_t/chars_read_total.md new file mode 100644 index 0000000000..9f6e736cf2 --- /dev/null +++ b/docs/mkdocs/docs/api/position_t/chars_read_total.md @@ -0,0 +1,28 @@ +# nlohmann::position_t::chars_read_total + +```cpp +std::size_t chars_read_total; +``` + +The total number of characters read. + +## Examples + +??? example + + The example below shows a SAX receiving the element bounds as `nlohmann::position_t` and + storing this location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point. + + ```cpp + --8<-- "examples/sax_parse_with_src_location_in_json.cpp" + ``` + + Output: + + ```json + --8<-- "examples/sax_parse_with_src_location_in_json.output" + ``` + +## Version history + +- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???. diff --git a/docs/mkdocs/docs/api/position_t/index.md b/docs/mkdocs/docs/api/position_t/index.md new file mode 100644 index 0000000000..16c4fd4312 --- /dev/null +++ b/docs/mkdocs/docs/api/position_t/index.md @@ -0,0 +1,23 @@ +# nlohmann::position_t + +```cpp +struct position_t; +``` + +This type represents the parsers position when parsing a json string using. +This position can be retrieved when using a [sax parser](../json_sax/index.md) with the format `nlohmann::json::input_format_t::json` +and implementing [next_token_start](../json_sax/next_token_start.md) or [next_token_end](../json_sax/next_token_end.md). + +## Member functions + +- [**operator size_t**](operator_size_t.md) - return the value of [chars_read_total](chars_read_total.md). + +## Member variables + +- [**chars_read_total**](chars_read_total.md) - The total number of characters read. +- [**lines_read**](lines_read.md) - The number of lines read. +- [**chars_read_current_line**](chars_read_current_line.md) - The number of characters read in the current line. + +## Version history + +- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???. diff --git a/docs/mkdocs/docs/api/position_t/lines_read.md b/docs/mkdocs/docs/api/position_t/lines_read.md new file mode 100644 index 0000000000..e22ee1d45a --- /dev/null +++ b/docs/mkdocs/docs/api/position_t/lines_read.md @@ -0,0 +1,28 @@ +# nlohmann::position_t::lines_read + +```cpp +std::size_t lines_read; +``` + +The number of lines read. + +## Examples + +??? example + + The example below shows a SAX receiving the element bounds as `nlohmann::position_t` and + storing this location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point. + + ```cpp + --8<-- "examples/sax_parse_with_src_location_in_json.cpp" + ``` + + Output: + + ```json + --8<-- "examples/sax_parse_with_src_location_in_json.output" + ``` + +## Version history + +- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???. diff --git a/docs/mkdocs/docs/api/position_t/operator_size_t.md b/docs/mkdocs/docs/api/position_t/operator_size_t.md new file mode 100644 index 0000000000..bc0325fd45 --- /dev/null +++ b/docs/mkdocs/docs/api/position_t/operator_size_t.md @@ -0,0 +1,28 @@ +# nlohmann::position_t:: + +```cpp +constexpr operator size_t() const; +``` + +return the value of [chars_read_total](chars_read_total.md). + +## Examples + +??? example + + The example below shows a SAX receiving the element bounds as `nlohmann::position_t` and + storing this location information in each json node using a [base class](../basic_json/json_base_class_t.md) for `nlohmann::json` as customization point. + + ```cpp + --8<-- "examples/sax_parse_with_src_location_in_json.cpp" + ``` + + Output: + + ```json + --8<-- "examples/sax_parse_with_src_location_in_json.output" + ``` + +## Version history + +- Moved from namespace `nlohmann::detail` to `nlohmann` in version ???.???.???. diff --git a/docs/mkdocs/docs/features/parsing/sax_interface.md b/docs/mkdocs/docs/features/parsing/sax_interface.md index 0796a55f52..e925d07c00 100644 --- a/docs/mkdocs/docs/features/parsing/sax_interface.md +++ b/docs/mkdocs/docs/features/parsing/sax_interface.md @@ -67,6 +67,30 @@ To implement your own SAX handler, proceed as follows: Note the `sax_parse` function only returns a `#!cpp bool` indicating the result of the last executed SAX event. It does not return `json` value - it is up to you to decide what to do with the SAX events. Furthermore, no exceptions are thrown in case of a parse error - it is up to you what to do with the exception object passed to your `parse_error` implementation. Internally, the SAX interface is used for the DOM parser (class `json_sax_dom_parser`) as well as the acceptor (`json_sax_acceptor`), see file `json_sax.hpp`. +## Element position information + +The position of a parsed element can be retrieved by implementing the optional methods [next_token_start](../../api/json_sax/next_token_start.md) and [next_token_end](../../api/json_sax/next_token_end.md). +These methods will be called with the parser position before any of the other methods are called and can be used to retrieve the half open bounds (`[start, end)`) of a parsed element. + +These Methods come in two flavors: + +1. +```cpp +void next_token_start(std::size_t pos); +void next_token_end(std::size_t pos); +``` +This flavor is called with the byte positions of each element and are available for any `nlohmann::json::input_format_t` passed to `nlohmann::json::sax_parse`. + +2. +```cpp +void next_token_start(const nlohmann::position_t& p); +void next_token_end(const nlohmann::position_t& p); +``` +This flavor is called with the [detailed parser position information](../../api/position_t/index.md) of each element and are only available if `nlohmann::json::sax_parse` is called with `nlohmann::json::input_format_t::json`. +Furthermore this flavor takes precedence over the first flavor. + +Depending on the required information it is possible for the SAX parser to implement all four or only one or none of these methods. + ## See also - [json_sax](../../api/json_sax/index.md) - documentation of the SAX interface diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml index f292220338..fcb3624f2b 100644 --- a/docs/mkdocs/mkdocs.yml +++ b/docs/mkdocs/mkdocs.yml @@ -248,6 +248,8 @@ nav: - 'start_array': api/json_sax/start_array.md - 'start_object': api/json_sax/start_object.md - 'string': api/json_sax/string.md + - 'next_token_start' : api/json_sax/next_token_start.md + - 'next_token_end' : api/json_sax/next_token_end.md - 'operator<<(basic_json)': api/operator_ltlt.md - 'operator<<(json_pointer)': api/operator_ltlt.md - 'operator>>(basic_json)': api/operator_gtgt.md @@ -255,6 +257,12 @@ nav: - 'operator""_json_pointer': api/operator_literal_json_pointer.md - 'ordered_json': api/ordered_json.md - 'ordered_map': api/ordered_map.md + - position_t: + - 'Overview': api/position_t/index.md + - 'operator size_t': api/position_t/operator_size_t.md + - 'chars_read_total': api/position_t/chars_read_total.md + - 'lines_read': api/position_t/lines_read.md + - 'chars_read_current_line': api/position_t/chars_read_current_line.md - macros: - 'Overview': api/macros/index.md - 'JSON_ASSERT': api/macros/json_assert.md diff --git a/include/nlohmann/detail/input/position_t.hpp b/include/nlohmann/detail/input/position_t.hpp index 396db0e16b..5450ee9615 100644 --- a/include/nlohmann/detail/input/position_t.hpp +++ b/include/nlohmann/detail/input/position_t.hpp @@ -13,9 +13,6 @@ #include NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - /// struct to capture the start position of the current token struct position_t { @@ -32,6 +29,4 @@ struct position_t return chars_read_total; } }; - -} // namespace detail NLOHMANN_JSON_NAMESPACE_END diff --git a/include/nlohmann/detail/meta/is_sax.hpp b/include/nlohmann/detail/meta/is_sax.hpp index fd05864340..6e8266f3f6 100644 --- a/include/nlohmann/detail/meta/is_sax.hpp +++ b/include/nlohmann/detail/meta/is_sax.hpp @@ -50,30 +50,30 @@ struct sax_call_next_token_end_pos_direct template struct sax_call_function { - // is the parameter a lexer or a position - static constexpr bool no_lexer = std::is_same::value; + // is the parameter a lexer or a byte position + static constexpr bool called_with_byte_pos = std::is_same::value; template using call_t = decltype(DirectCaller::call(std::declval(), std::declval()...)); //the sax parser supports calls with a position - static constexpr bool detected_call_with_pos = + static constexpr bool detected_call_with_byte_pos = is_detected_exact::value; //the sax parser supports calls with a lexer - static constexpr bool detected_call_with_lex = - !no_lexer && - is_detected_exact::value; + static constexpr bool detected_call_with_lex_pos = + !called_with_byte_pos && + is_detected_exact::value; //there either has to be a version accepting a lexer or a position - static constexpr bool valid = detected_call_with_pos || detected_call_with_lex; + static constexpr bool valid = detected_call_with_byte_pos || detected_call_with_lex_pos; - //called with pos and pos is method supported -> pass data on + //called with byte pos and byte pos is method supported -> pass data on template static typename std::enable_if < - sax_call_function::valid && std::is_same::value && - sax_call_function::detected_call_with_pos + valid && + detected_call_with_byte_pos >::type call(SaxT* sax, std::size_t pos) { @@ -84,46 +84,70 @@ struct sax_call_function template static typename std::enable_if < std::is_same::value && - !sax_call_function::valid + !valid >::type call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {} - //called with lex and lex method is supported -> pass data on + //called with lex and lex pos method is supported -> call with position from lexer + // the start pos in the lexer is last read char -> chars_read_total-1 + template + static typename std::enable_if < + std::is_same::value && + valid && + !called_with_byte_pos && + detected_call_with_lex_pos && + std::is_same::value + >::type + call(SaxT* sax, const LexOrPos& lex) + { + JSON_ASSERT(lex.get_position().chars_read_total > 0); + JSON_ASSERT(lex.get_position().chars_read_current_line > 0); + //the lexer has already read the first char of the current element -> fix this + auto pos_copy = lex.get_position(); + --pos_copy.chars_read_total; + --pos_copy.chars_read_current_line; + DirectCaller::call(sax, pos_copy); + } + + //called with lex and lex pos method is supported -> pass data on + // the one past end pos in the lexer is the current index -> chars_read_total template static typename std::enable_if < - sax_call_function::valid && std::is_same::value && - !sax_call_function::no_lexer && - sax_call_function::detected_call_with_lex + valid && + !called_with_byte_pos && + detected_call_with_lex_pos && + std::is_same::value >::type call(SaxT* sax, const LexOrPos& lex) { - DirectCaller::call(sax, lex); + DirectCaller::call(sax, lex.get_position()); } - // called with lex and only pos method is supported -> call with position from lexer + // called with lex and only byte pos method is supported -> call with byte position from lexer // the start pos in the lexer is last read char -> chars_read_total-1 template static typename std::enable_if < - sax_call_function::valid && std::is_same::value && - !sax_call_function::no_lexer && - !sax_call_function::detected_call_with_lex && + valid && + !called_with_byte_pos && + !detected_call_with_lex_pos && std::is_same::value >::type call(SaxT* sax, const LexOrPos& lex) { + JSON_ASSERT(lex.get_position().chars_read_total > 0); DirectCaller::call(sax, lex.get_position().chars_read_total - 1); } - // called with lex and only pos method is supported -> call with position from lexer + // called with lex and only byte pos method is supported -> call with byte position from lexer // the one past end pos in the lexer is the current index -> chars_read_total template static typename std::enable_if < - sax_call_function::valid && std::is_same::value && - !sax_call_function::no_lexer && - !sax_call_function::detected_call_with_lex && + valid && + !called_with_byte_pos && + !detected_call_with_lex_pos && std::is_same::value >::type call(SaxT* sax, const LexOrPos& lex) diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index ae7aacbccd..b73d84e7ed 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3015,9 +3015,6 @@ NLOHMANN_JSON_NAMESPACE_END NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - /// struct to capture the start position of the current token struct position_t { @@ -3034,8 +3031,6 @@ struct position_t return chars_read_total; } }; - -} // namespace detail NLOHMANN_JSON_NAMESPACE_END // #include @@ -9002,30 +8997,30 @@ struct sax_call_next_token_end_pos_direct template struct sax_call_function { - // is the parameter a lexer or a position - static constexpr bool no_lexer = std::is_same::value; + // is the parameter a lexer or a byte position + static constexpr bool called_with_byte_pos = std::is_same::value; template using call_t = decltype(DirectCaller::call(std::declval(), std::declval()...)); //the sax parser supports calls with a position - static constexpr bool detected_call_with_pos = + static constexpr bool detected_call_with_byte_pos = is_detected_exact::value; //the sax parser supports calls with a lexer - static constexpr bool detected_call_with_lex = - !no_lexer && - is_detected_exact::value; + static constexpr bool detected_call_with_lex_pos = + !called_with_byte_pos && + is_detected_exact::value; //there either has to be a version accepting a lexer or a position - static constexpr bool valid = detected_call_with_pos || detected_call_with_lex; + static constexpr bool valid = detected_call_with_byte_pos || detected_call_with_lex_pos; - //called with pos and pos is method supported -> pass data on + //called with byte pos and byte pos is method supported -> pass data on template static typename std::enable_if < - sax_call_function::valid && std::is_same::value && - sax_call_function::detected_call_with_pos + valid && + detected_call_with_byte_pos >::type call(SaxT* sax, std::size_t pos) { @@ -9036,46 +9031,70 @@ struct sax_call_function template static typename std::enable_if < std::is_same::value && - !sax_call_function::valid + !valid >::type call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {} - //called with lex and lex method is supported -> pass data on + //called with lex and lex pos method is supported -> call with position from lexer + // the start pos in the lexer is last read char -> chars_read_total-1 template static typename std::enable_if < - sax_call_function::valid && std::is_same::value && - !sax_call_function::no_lexer && - sax_call_function::detected_call_with_lex + valid && + !called_with_byte_pos && + detected_call_with_lex_pos && + std::is_same::value + >::type + call(SaxT* sax, const LexOrPos& lex) + { + JSON_ASSERT(lex.get_position().chars_read_total > 0); + JSON_ASSERT(lex.get_position().chars_read_current_line > 0); + //the lexer has already read the first char of the current element -> fix this + auto pos_copy = lex.get_position(); + --pos_copy.chars_read_total; + --pos_copy.chars_read_current_line; + DirectCaller::call(sax, pos_copy); + } + + //called with lex and lex pos method is supported -> pass data on + // the one past end pos in the lexer is the current index -> chars_read_total + template + static typename std::enable_if < + std::is_same::value && + valid && + !called_with_byte_pos && + detected_call_with_lex_pos && + std::is_same::value >::type call(SaxT* sax, const LexOrPos& lex) { - DirectCaller::call(sax, lex); + DirectCaller::call(sax, lex.get_position()); } - // called with lex and only pos method is supported -> call with position from lexer + // called with lex and only byte pos method is supported -> call with byte position from lexer // the start pos in the lexer is last read char -> chars_read_total-1 template static typename std::enable_if < - sax_call_function::valid && std::is_same::value && - !sax_call_function::no_lexer && - !sax_call_function::detected_call_with_lex && + valid && + !called_with_byte_pos && + !detected_call_with_lex_pos && std::is_same::value >::type call(SaxT* sax, const LexOrPos& lex) { + JSON_ASSERT(lex.get_position().chars_read_total > 0); DirectCaller::call(sax, lex.get_position().chars_read_total - 1); } - // called with lex and only pos method is supported -> call with position from lexer + // called with lex and only byte pos method is supported -> call with byte position from lexer // the one past end pos in the lexer is the current index -> chars_read_total template static typename std::enable_if < - sax_call_function::valid && std::is_same::value && - !sax_call_function::no_lexer && - !sax_call_function::detected_call_with_lex && + valid && + !called_with_byte_pos && + !detected_call_with_lex_pos && std::is_same::value >::type call(SaxT* sax, const LexOrPos& lex) diff --git a/tests/src/unit-sax-parser-extended.cpp b/tests/src/unit-sax-parser-extended.cpp index 08e33941b9..88342965d8 100644 --- a/tests/src/unit-sax-parser-extended.cpp +++ b/tests/src/unit-sax-parser-extended.cpp @@ -109,10 +109,10 @@ std::ostream& operator<<(std::ostream& out, const std::set& v) return out; } -template +template struct Sax { - static constexpr bool has_callback = WithPos || (WithLex && !LexCallImpossible); + static constexpr bool has_callback = WithBytePos || (WithLexPos && !LexCallImpossible); using json = nlohmann::json; enum class last_call_t @@ -167,32 +167,32 @@ struct Sax last_call = last_call_t::end_pos; } - template + template typename std::enable_if::type next_token_start(std::size_t pos) { check_start(pos); - CHECK((!WithLex || LexCallImpossible)); + CHECK((!WithLexPos || LexCallImpossible)); } - template < class LexT, bool Act = WithLex && !std::is_same::value > - typename std::enable_if::type next_token_start(const LexT& lex) + template < bool Act = WithLexPos > + typename std::enable_if::type next_token_start(const nlohmann::position_t& p) { - check_start(lex.get_position().chars_read_total - 1); - CHECK(WithLex); + check_start(p.chars_read_total); + CHECK(WithLexPos); } - template + template typename std::enable_if::type next_token_end(std::size_t pos) { check_end(pos); - CHECK((!WithLex || LexCallImpossible)); + CHECK((!WithLexPos || LexCallImpossible)); } - template < class LexT, bool Act = WithLex && !std::is_same::value > - typename std::enable_if::type next_token_end(const LexT& lex) + template < bool Act = WithLexPos > + typename std::enable_if::type next_token_end(const nlohmann::position_t& p) { - check_end(lex.get_position().chars_read_total); - CHECK(WithLex); + check_end(p.chars_read_total); + CHECK(WithLexPos); } bool null() @@ -303,11 +303,11 @@ struct Sax } }; -template +template struct Opt { - static constexpr bool WithPos = WithPosV; - static constexpr bool WithLex = WithLexV; + static constexpr bool WithBytePos = WithBytePosV; + static constexpr bool WithLexPos = WithLexPosV; }; using OptNone = Opt; @@ -318,10 +318,10 @@ using OptBoth = Opt; //test basic functionality TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth) { - const bool with_pos = T::WithPos; - const bool with_lex = T::WithLex; + const bool with_pos = T::WithBytePos; + const bool with_lex = T::WithLexPos; - INFO("WithPos " << with_pos << ", WithLex " << with_lex); + INFO("WithBytePos " << with_pos << ", WithLexPos " << with_lex); //element count 0 1 2 3 4 5 6 7 8 9 10 //index 10s place 0 1 2 3 4 5 //index 1s place 012345678901234567890123456789012345678901234567890123 @@ -351,7 +351,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth) reconstructed += s; skip(s.size()); }; - Sax sax; + Sax sax; sax.pos_start_object.emplace(elementFromStr("{")); skipFromStr(" "); sax.pos_key.emplace(elementFromStr(R"("array")")); @@ -384,7 +384,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth) { const auto j = nlohmann::json::parse(str); const auto bin = nlohmann::json::to_bson(j); - Sax sax; + Sax sax; sax.pos_start_object.emplace(element(4)); //4 bytes size skip(1); //one byte type array sax.pos_key.emplace(element(6)); //6 key (array\0) @@ -414,7 +414,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth) { const auto j = nlohmann::json::parse(str); const auto bin = nlohmann::json::to_cbor(j); - Sax sax; + Sax sax; sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) sax.pos_key.emplace(element(6)); //1 byte type + 5 bytes string (array) (size implicit) sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) @@ -437,7 +437,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth) { const auto j = nlohmann::json::parse(str); const auto bin = nlohmann::json::to_msgpack(j); - Sax sax; + Sax sax; sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size sax.pos_key.emplace(element(6)); //1 byte type + 5 bytes string (array) (size implicit) sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) @@ -460,7 +460,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth) { const auto j = nlohmann::json::parse(str); const auto bin = nlohmann::json::to_ubjson(j); - Sax sax; + Sax sax; sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size sax.pos_key.emplace(element(7)); //1 byte type + 6 bytes string (array\0) sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) @@ -483,7 +483,7 @@ TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth) { const auto j = nlohmann::json::parse(str); const auto bin = nlohmann::json::to_bjdata(j); - Sax sax; + Sax sax; sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size sax.pos_key.emplace(element(7)); //1 byte type + 6 bytes string (array\0) sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) diff --git a/tests/src/unit-sax-parser-store-source-location.cpp b/tests/src/unit-sax-parser-store-source-location.cpp index 0820a81ce2..4a069c38b0 100644 --- a/tests/src/unit-sax-parser-store-source-location.cpp +++ b/tests/src/unit-sax-parser-store-source-location.cpp @@ -35,17 +35,17 @@ SOFTWARE. #include //prototype to make -Wmissing-prototypes happy -std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p); +std::ostream& operator<<(std::ostream& out, const nlohmann::position_t& p); //test json parser with detailed line / col information as metadata struct token_start_stop { - nlohmann::detail::position_t start{}; - nlohmann::detail::position_t stop{}; + nlohmann::position_t start{}; + nlohmann::position_t stop{}; }; -std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p) +std::ostream& operator<<(std::ostream& out, const nlohmann::position_t& p) { out << p.chars_read_total << '(' << p.lines_read << ':' << p.chars_read_current_line << ')'; return out; @@ -90,16 +90,14 @@ class sax_with_token_start_stop_metadata , start_stop{} {} - template - void next_token_start(const nlohmann::detail::lexer& lex) + void next_token_start(const nlohmann::position_t& p) { - start_stop.start = lex.get_position(); + start_stop.start = p; } - template - void next_token_end(const nlohmann::detail::lexer& lex) + void next_token_end(const nlohmann::position_t& p) { - start_stop.stop = lex.get_position(); + start_stop.stop = p; } bool null() @@ -294,38 +292,38 @@ TEST_CASE("parse-json-with-position-info") sax_with_token_start_stop_metadata sax{j}; CHECK(nlohmann::json::sax_parse(str, &sax, nlohmann::json::input_format_t::json)); CHECK(j.start.lines_read == 0); - CHECK(j.start.chars_read_current_line == 1); + CHECK(j.start.chars_read_current_line == 0); CHECK(j["array"].start.lines_read == 1); - CHECK(j["array"].start.chars_read_current_line == 13); + CHECK(j["array"].start.chars_read_current_line == 12); CHECK(j["array"][0].start.lines_read == 2); - CHECK(j["array"][0].start.chars_read_current_line == 5); + CHECK(j["array"][0].start.chars_read_current_line == 4); CHECK(j["array"][0].stop.lines_read == 2); CHECK(j["array"][0].stop.chars_read_current_line == 15); CHECK(j["array"][1].start.lines_read == 3); - CHECK(j["array"][1].start.chars_read_current_line == 5); + CHECK(j["array"][1].start.chars_read_current_line == 4); CHECK(j["array"][1].stop.lines_read == 3); CHECK(j["array"][1].stop.chars_read_current_line == 6); CHECK(j["array"][2].start.lines_read == 4); - CHECK(j["array"][2].start.chars_read_current_line == 5); + CHECK(j["array"][2].start.chars_read_current_line == 4); CHECK(j["array"][2].stop.lines_read == 4); CHECK(j["array"][2].stop.chars_read_current_line == 8); CHECK(j["array"][3].start.lines_read == 5); - CHECK(j["array"][3].start.chars_read_current_line == 5); + CHECK(j["array"][3].start.chars_read_current_line == 4); CHECK(j["array"][3].stop.lines_read == 5); CHECK(j["array"][3].stop.chars_read_current_line == 7); CHECK(j["array"][4].start.lines_read == 6); //starts directly after last value.... - CHECK(j["array"][4].start.chars_read_current_line == 5); + CHECK(j["array"][4].start.chars_read_current_line == 4); CHECK(j["array"][4].stop.lines_read == 6); CHECK(j["array"][4].stop.chars_read_current_line == 8); CHECK(j["array"][5].start.lines_read == 7); - CHECK(j["array"][5].start.chars_read_current_line == 5); + CHECK(j["array"][5].start.chars_read_current_line == 4); CHECK(j["array"][5].stop.lines_read == 7); CHECK(j["array"][5].stop.chars_read_current_line == 9);