From 6d73126ea9b9579a44e4ab42818f16f414aea1f9 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 7 Jun 2020 22:47:25 +0200 Subject: [PATCH 1/4] :rotating_light: fix warnings --- include/nlohmann/detail/input/input_adapters.hpp | 12 ++++++------ include/nlohmann/detail/input/lexer.hpp | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 0a10847613..923a7aaabf 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -247,15 +247,15 @@ struct wide_string_input_helper } else if (wc <= 0x7FF) { - utf8_bytes[0] = static_cast::int_type>(0xC0u | ((wc >> 6u))); - utf8_bytes[1] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); + utf8_bytes[0] = static_cast::int_type>(0xC0u | ((static_cast(wc) >> 6u))); + utf8_bytes[1] = static_cast::int_type>(0x80u | (static_cast(wc) & 0x3Fu)); utf8_bytes_filled = 2; } else if (0xD800 > wc or wc >= 0xE000) { - utf8_bytes[0] = static_cast::int_type>(0xE0u | ((wc >> 12u))); - utf8_bytes[1] = static_cast::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); - utf8_bytes[2] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); + utf8_bytes[0] = static_cast::int_type>(0xE0u | ((static_cast(wc) >> 12u))); + utf8_bytes[1] = static_cast::int_type>(0x80u | ((static_cast(wc) >> 6u) & 0x3Fu)); + utf8_bytes[2] = static_cast::int_type>(0x80u | (static_cast(wc) & 0x3Fu)); utf8_bytes_filled = 3; } else @@ -263,7 +263,7 @@ struct wide_string_input_helper if (JSON_HEDLEY_UNLIKELY(not input.empty())) { const auto wc2 = static_cast(input.get_character()); - const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); + const auto charcode = 0x10000u + (((static_cast(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); utf8_bytes[0] = static_cast::int_type>(0xF0u | (charcode >> 18u)); utf8_bytes[1] = static_cast::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); utf8_bytes[2] = static_cast::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu)); diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index e710140b79..0ff0c7362d 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -113,7 +113,7 @@ class lexer : public lexer_base using token_type = typename lexer_base::token_type; explicit lexer(InputAdapterType&& adapter) - : ia(std::move(adapter)), decimal_point_char(static_cast(get_decimal_point())) {} + : ia(std::move(adapter)), decimal_point_char(static_cast(get_decimal_point())) {} // delete because of pointer members lexer(const lexer&) = delete; @@ -1218,7 +1218,7 @@ class lexer : public lexer_base token_type scan_literal(const char_type* literal_text, const std::size_t length, token_type return_type) { - assert(current == literal_text[0]); + assert(std::char_traits::to_char_type(current) == literal_text[0]); for (std::size_t i = 1; i < length; ++i) { if (JSON_HEDLEY_UNLIKELY(std::char_traits::to_char_type(get()) != literal_text[i])) @@ -1523,7 +1523,7 @@ class lexer : public lexer_base number_float_t value_float = 0; /// the decimal point - const char_type decimal_point_char = '.'; + const char_int_type decimal_point_char = '.'; }; } // namespace detail } // namespace nlohmann From d4bc0c39af0f199d8e830c6c09f53eb23ed824a2 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 7 Jun 2020 22:47:54 +0200 Subject: [PATCH 2/4] :white_check_mark: add test for character types --- test/src/unit-deserialization.cpp | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 15744b9eb7..631266549b 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -1039,3 +1039,60 @@ TEST_CASE("deserialization") })); } } + +TEST_CASE_TEMPLATE("deserialization of different character types", T, + char, unsigned char, signed char, + wchar_t, + char16_t, char32_t, + std::uint8_t, std::int8_t, + std::int16_t, std::uint16_t, + std::int32_t, std::uint32_t) +{ + std::vector v = {'t', 'r', 'u', 'e'}; + CHECK(json::parse(v) == json(true)); + CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); + CHECK(l.events == std::vector({"boolean(true)"})); +} + +TEST_CASE_TEMPLATE("deserialization of different character types (UTF-8)", T, + char, unsigned char, std::uint8_t) +{ + // a star emoji + std::vector v = {'"', static_cast(0xe2), static_cast(0xad), static_cast(0x90), static_cast(0xef), static_cast(0xb8), static_cast(0x8f), '"'}; + CHECK(json::parse(v).dump(-1, ' ', true) == "\"\\u2b50\\ufe0f\""); + CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); +} + +TEST_CASE_TEMPLATE("deserialization of different character types (UTF-16)", T, + char16_t, std::uint16_t) +{ + // a star emoji + std::vector v = {static_cast('"'), static_cast(0x2b50), static_cast(0xfe0f), static_cast('"')}; + CHECK(json::parse(v).dump(-1, ' ', true) == "\"\\u2b50\\ufe0f\""); + CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); +} + +TEST_CASE_TEMPLATE("deserialization of different character types (UTF-32)", T, + char32_t, std::uint32_t) +{ + // a star emoji + std::vector v = {static_cast('"'), static_cast(0x2b50), static_cast(0xfe0f), static_cast('"')}; + CHECK(json::parse(v).dump(-1, ' ', true) == "\"\\u2b50\\ufe0f\""); + CHECK(json::accept(v)); + + SaxEventLogger l; + CHECK(json::sax_parse(v, &l)); + CHECK(l.events.size() == 1); +} From 2cf16625acefe244fc508284d5c2850e08976c73 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sun, 7 Jun 2020 22:49:39 +0200 Subject: [PATCH 3/4] :white_check_mark: add test for character types --- test/src/unit-deserialization.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 631266549b..88b1f3c5c9 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -1040,7 +1040,7 @@ TEST_CASE("deserialization") } } -TEST_CASE_TEMPLATE("deserialization of different character types", T, +TEST_CASE_TEMPLATE("deserialization of different character types (ASCII)", T, char, unsigned char, signed char, wchar_t, char16_t, char32_t, From ae04426c856ef5f5bd6d54fea069836ba3499f94 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Mon, 8 Jun 2020 20:07:15 +0200 Subject: [PATCH 4/4] :checkered_flag: fix for MSVC (see https://github.com/nlohmann/json/pull/2178#issuecomment-640622532) --- .../nlohmann/detail/input/input_adapters.hpp | 13 +++++--- single_include/nlohmann/json.hpp | 31 ++++++++++--------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 923a7aaabf..ed9db0f6f0 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -339,15 +339,18 @@ struct iterator_input_adapter_factory } }; -// This test breaks astyle formatting when inlined in a template specialization. template -inline constexpr bool is_iterator_of_multibyte() +struct is_iterator_of_multibyte { - return sizeof(typename std::iterator_traits::value_type) > 1; -} + using value_type = typename std::iterator_traits::value_type; + enum + { + value = sizeof(value_type) > 1 + }; +}; template -struct iterator_input_adapter_factory()>> +struct iterator_input_adapter_factory::value>> { using iterator_type = IteratorType; using char_type = typename std::iterator_traits::value_type; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index dd9b21c4f4..e45a756f3b 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -4669,15 +4669,15 @@ struct wide_string_input_helper } else if (wc <= 0x7FF) { - utf8_bytes[0] = static_cast::int_type>(0xC0u | ((wc >> 6u))); - utf8_bytes[1] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); + utf8_bytes[0] = static_cast::int_type>(0xC0u | ((static_cast(wc) >> 6u))); + utf8_bytes[1] = static_cast::int_type>(0x80u | (static_cast(wc) & 0x3Fu)); utf8_bytes_filled = 2; } else if (0xD800 > wc or wc >= 0xE000) { - utf8_bytes[0] = static_cast::int_type>(0xE0u | ((wc >> 12u))); - utf8_bytes[1] = static_cast::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); - utf8_bytes[2] = static_cast::int_type>(0x80u | (wc & 0x3Fu)); + utf8_bytes[0] = static_cast::int_type>(0xE0u | ((static_cast(wc) >> 12u))); + utf8_bytes[1] = static_cast::int_type>(0x80u | ((static_cast(wc) >> 6u) & 0x3Fu)); + utf8_bytes[2] = static_cast::int_type>(0x80u | (static_cast(wc) & 0x3Fu)); utf8_bytes_filled = 3; } else @@ -4685,7 +4685,7 @@ struct wide_string_input_helper if (JSON_HEDLEY_UNLIKELY(not input.empty())) { const auto wc2 = static_cast(input.get_character()); - const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); + const auto charcode = 0x10000u + (((static_cast(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); utf8_bytes[0] = static_cast::int_type>(0xF0u | (charcode >> 18u)); utf8_bytes[1] = static_cast::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); utf8_bytes[2] = static_cast::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu)); @@ -4761,15 +4761,18 @@ struct iterator_input_adapter_factory } }; -// This test breaks astyle formatting when inlined in a template specialization. template -inline constexpr bool is_iterator_of_multibyte() +struct is_iterator_of_multibyte { - return sizeof(typename std::iterator_traits::value_type) > 1; -} + using value_type = typename std::iterator_traits::value_type; + enum + { + value = sizeof(value_type) > 1 + }; +}; template -struct iterator_input_adapter_factory()>> +struct iterator_input_adapter_factory::value>> { using iterator_type = IteratorType; using char_type = typename std::iterator_traits::value_type; @@ -8177,7 +8180,7 @@ class lexer : public lexer_base using token_type = typename lexer_base::token_type; explicit lexer(InputAdapterType&& adapter) - : ia(std::move(adapter)), decimal_point_char(static_cast(get_decimal_point())) {} + : ia(std::move(adapter)), decimal_point_char(static_cast(get_decimal_point())) {} // delete because of pointer members lexer(const lexer&) = delete; @@ -9282,7 +9285,7 @@ class lexer : public lexer_base token_type scan_literal(const char_type* literal_text, const std::size_t length, token_type return_type) { - assert(current == literal_text[0]); + assert(std::char_traits::to_char_type(current) == literal_text[0]); for (std::size_t i = 1; i < length; ++i) { if (JSON_HEDLEY_UNLIKELY(std::char_traits::to_char_type(get()) != literal_text[i])) @@ -9587,7 +9590,7 @@ class lexer : public lexer_base number_float_t value_float = 0; /// the decimal point - const char_type decimal_point_char = '.'; + const char_int_type decimal_point_char = '.'; }; } // namespace detail } // namespace nlohmann