From b6c753c96a2c10c8b5427fa1ea62bbfa9c310c84 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Tue, 4 Jan 2022 21:23:55 +0100 Subject: [PATCH] :memo: add documentation for JSON Lines --- doc/examples/json_lines.cpp | 22 +++++++++ doc/examples/json_lines.output | 4 ++ .../docs/features/parsing/json_lines.md | 49 +++++++++++++++++++ doc/mkdocs/mkdocs.yml | 1 + test/src/unit-deserialization.cpp | 42 ++++++++++++++++ 5 files changed, 118 insertions(+) create mode 100644 doc/examples/json_lines.cpp create mode 100644 doc/examples/json_lines.output create mode 100644 doc/mkdocs/docs/features/parsing/json_lines.md diff --git a/doc/examples/json_lines.cpp b/doc/examples/json_lines.cpp new file mode 100644 index 0000000000..233c81a4a1 --- /dev/null +++ b/doc/examples/json_lines.cpp @@ -0,0 +1,22 @@ +#include +#include +#include + +using json = nlohmann::json; + +int main() +{ + // JSON Lines (see https://jsonlines.org) + std::stringstream input; + input << R"({"name": "Gilbert", "wins": [["straight", "7♣"], ["one pair", "10♥"]]} +{"name": "Alexa", "wins": [["two pair", "4♠"], ["two pair", "9♠"]]} +{"name": "May", "wins": []} +{"name": "Deloise", "wins": [["three of a kind", "5♣"]]} +)"; + + std::string line; + while (std::getline(input, line)) + { + std::cout << json::parse(line) << std::endl; + } +} diff --git a/doc/examples/json_lines.output b/doc/examples/json_lines.output new file mode 100644 index 0000000000..1b4122480b --- /dev/null +++ b/doc/examples/json_lines.output @@ -0,0 +1,4 @@ +{"name":"Gilbert","wins":[["straight","7♣"],["one pair","10♥"]]} +{"name":"Alexa","wins":[["two pair","4♠"],["two pair","9♠"]]} +{"name":"May","wins":[]} +{"name":"Deloise","wins":[["three of a kind","5♣"]]} diff --git a/doc/mkdocs/docs/features/parsing/json_lines.md b/doc/mkdocs/docs/features/parsing/json_lines.md new file mode 100644 index 0000000000..9542cb0c51 --- /dev/null +++ b/doc/mkdocs/docs/features/parsing/json_lines.md @@ -0,0 +1,49 @@ +# JSON Lines + +The [JSON Lines](https://jsonlines.org) format is a text format of newline-delimited JSON. In particular: + +1. The input must be UTF-8 encoded. +2. Every line must be a valid JSON value. +3. The line separator must be `\n`. As `\r` is silently ignored, `\r\n` is also supported. +4. The final character may be `\n`, but is not required to be one. + +!!! example "JSON Text example" + + ```json + {"name": "Gilbert", "wins": [["straight", "7♣"], ["one pair", "10♥"]]} + {"name": "Alexa", "wins": [["two pair", "4♠"], ["two pair", "9♠"]]} + {"name": "May", "wins": []} + {"name": "Deloise", "wins": [["three of a kind", "5♣"]]} + ``` + +JSON Lines input with more than one value is treated as invalid JSON by the [`parse`](../../api/basic_json/parse.md) or +[`accept`](../../api/basic_json/accept.md) functions. The process it line by line, functions like +[`std::getline`](https://en.cppreference.com/w/cpp/string/basic_string/getline) can be used: + +!!! example "Example: Parse JSON Text input line by line" + + The example below demonstrates how JSON Lines can be processed. + + ```cpp + --8<-- "examples/json_lines.cpp" + ``` + + Output: + + ```json + --8<-- "examples/json_lines.output" + ``` + +!!! warning "Note" + + Using [`operator>>`](../../api/basic_json/operator_gtgt.md) like + + ```cpp + json j; + while (input >> j) + { + std::cout << j << std::endl; + } + ``` + + with a JSON Lines input does not work, because the parser will try to parse one value after the last one. diff --git a/doc/mkdocs/mkdocs.yml b/doc/mkdocs/mkdocs.yml index 542d4f173f..618f7344b3 100644 --- a/doc/mkdocs/mkdocs.yml +++ b/doc/mkdocs/mkdocs.yml @@ -60,6 +60,7 @@ nav: - features/object_order.md - Parsing: - features/parsing/index.md + - features/parsing/json_lines.md - features/parsing/parse_exceptions.md - features/parsing/parser_callbacks.md - features/parsing/sax_interface.md diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index bb24241cec..45a8c0e6b1 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -1065,6 +1065,48 @@ TEST_CASE("deserialization") "start_array()" })); } + + SECTION("JSON Lines") + { + SECTION("Example file") + { + std::stringstream ss; + ss << R"({"name": "Gilbert", "wins": [["straight", "7♣"], ["one pair", "10♥"]]} + {"name": "Alexa", "wins": [["two pair", "4♠"], ["two pair", "9♠"]]} + {"name": "May", "wins": []} + {"name": "Deloise", "wins": [["three of a kind", "5♣"]]} +)"; + + std::string line; + int object_count = 0; + while (std::getline(ss, line)) + { + ++object_count; + CHECK(json::accept(line)); + } + + CHECK(object_count == 4); + } + + SECTION("Example file without trailing newline") + { + std::stringstream ss; + ss << R"({"name": "Gilbert", "wins": [["straight", "7♣"], ["one pair", "10♥"]]} + {"name": "Alexa", "wins": [["two pair", "4♠"], ["two pair", "9♠"]]} + {"name": "May", "wins": []} + {"name": "Deloise", "wins": [["three of a kind", "5♣"]]})"; + + std::string line; + int object_count = 0; + while (std::getline(ss, line)) + { + ++object_count; + CHECK(json::accept(line)); + } + + CHECK(object_count == 4); + } + } } TEST_CASE_TEMPLATE("deserialization of different character types (ASCII)", T,