Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add input adapter tests #2178

Merged
merged 4 commits into from
Jun 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions include/nlohmann/detail/input/input_adapters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,23 +247,23 @@ struct wide_string_input_helper<BaseInputAdapter, 2>
}
else if (wc <= 0x7FF)
{
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
utf8_bytes_filled = 2;
}
else if (0xD800 > wc or wc >= 0xE000)
{
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
utf8_bytes_filled = 3;
}
else
{
if (JSON_HEDLEY_UNLIKELY(not input.empty()))
{
const auto wc2 = static_cast<unsigned int>(input.get_character());
const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
Expand Down Expand Up @@ -339,15 +339,18 @@ struct iterator_input_adapter_factory
}
};

// This test breaks astyle formatting when inlined in a template specialization.
template<typename T>
inline constexpr bool is_iterator_of_multibyte()
struct is_iterator_of_multibyte
{
return sizeof(typename std::iterator_traits<T>::value_type) > 1;
}
using value_type = typename std::iterator_traits<T>::value_type;
enum
{
value = sizeof(value_type) > 1
};
};

template<typename IteratorType>
struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>()>>
struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
{
using iterator_type = IteratorType;
using char_type = typename std::iterator_traits<iterator_type>::value_type;
Expand Down
6 changes: 3 additions & 3 deletions include/nlohmann/detail/input/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class lexer : public lexer_base<BasicJsonType>
using token_type = typename lexer_base<BasicJsonType>::token_type;

explicit lexer(InputAdapterType&& adapter)
: ia(std::move(adapter)), decimal_point_char(static_cast<char_type>(get_decimal_point())) {}
: ia(std::move(adapter)), decimal_point_char(static_cast<char_int_type>(get_decimal_point())) {}

// delete because of pointer members
lexer(const lexer&) = delete;
Expand Down Expand Up @@ -1218,7 +1218,7 @@ class lexer : public lexer_base<BasicJsonType>
token_type scan_literal(const char_type* literal_text, const std::size_t length,
token_type return_type)
{
assert(current == literal_text[0]);
assert(std::char_traits<char_type>::to_char_type(current) == literal_text[0]);
for (std::size_t i = 1; i < length; ++i)
{
if (JSON_HEDLEY_UNLIKELY(std::char_traits<char_type>::to_char_type(get()) != literal_text[i]))
Expand Down Expand Up @@ -1523,7 +1523,7 @@ class lexer : public lexer_base<BasicJsonType>
number_float_t value_float = 0;

/// the decimal point
const char_type decimal_point_char = '.';
const char_int_type decimal_point_char = '.';
};
} // namespace detail
} // namespace nlohmann
31 changes: 17 additions & 14 deletions single_include/nlohmann/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4669,23 +4669,23 @@ struct wide_string_input_helper<BaseInputAdapter, 2>
}
else if (wc <= 0x7FF)
{
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
utf8_bytes_filled = 2;
}
else if (0xD800 > wc or wc >= 0xE000)
{
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
utf8_bytes_filled = 3;
}
else
{
if (JSON_HEDLEY_UNLIKELY(not input.empty()))
{
const auto wc2 = static_cast<unsigned int>(input.get_character());
const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
Expand Down Expand Up @@ -4761,15 +4761,18 @@ struct iterator_input_adapter_factory
}
};

// This test breaks astyle formatting when inlined in a template specialization.
template<typename T>
inline constexpr bool is_iterator_of_multibyte()
struct is_iterator_of_multibyte
{
return sizeof(typename std::iterator_traits<T>::value_type) > 1;
}
using value_type = typename std::iterator_traits<T>::value_type;
enum
{
value = sizeof(value_type) > 1
};
};

template<typename IteratorType>
struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>()>>
struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
{
using iterator_type = IteratorType;
using char_type = typename std::iterator_traits<iterator_type>::value_type;
Expand Down Expand Up @@ -8177,7 +8180,7 @@ class lexer : public lexer_base<BasicJsonType>
using token_type = typename lexer_base<BasicJsonType>::token_type;

explicit lexer(InputAdapterType&& adapter)
: ia(std::move(adapter)), decimal_point_char(static_cast<char_type>(get_decimal_point())) {}
: ia(std::move(adapter)), decimal_point_char(static_cast<char_int_type>(get_decimal_point())) {}

// delete because of pointer members
lexer(const lexer&) = delete;
Expand Down Expand Up @@ -9282,7 +9285,7 @@ class lexer : public lexer_base<BasicJsonType>
token_type scan_literal(const char_type* literal_text, const std::size_t length,
token_type return_type)
{
assert(current == literal_text[0]);
assert(std::char_traits<char_type>::to_char_type(current) == literal_text[0]);
for (std::size_t i = 1; i < length; ++i)
{
if (JSON_HEDLEY_UNLIKELY(std::char_traits<char_type>::to_char_type(get()) != literal_text[i]))
Expand Down Expand Up @@ -9587,7 +9590,7 @@ class lexer : public lexer_base<BasicJsonType>
number_float_t value_float = 0;

/// the decimal point
const char_type decimal_point_char = '.';
const char_int_type decimal_point_char = '.';
};
} // namespace detail
} // namespace nlohmann
Expand Down
57 changes: 57 additions & 0 deletions test/src/unit-deserialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1039,3 +1039,60 @@ TEST_CASE("deserialization")
}));
}
}

TEST_CASE_TEMPLATE("deserialization of different character types (ASCII)", T,
char, unsigned char, signed char,
wchar_t,
char16_t, char32_t,
std::uint8_t, std::int8_t,
std::int16_t, std::uint16_t,
std::int32_t, std::uint32_t)
{
std::vector<T> v = {'t', 'r', 'u', 'e'};
CHECK(json::parse(v) == json(true));
CHECK(json::accept(v));

SaxEventLogger l;
CHECK(json::sax_parse(v, &l));
CHECK(l.events.size() == 1);
CHECK(l.events == std::vector<std::string>({"boolean(true)"}));
}

TEST_CASE_TEMPLATE("deserialization of different character types (UTF-8)", T,
char, unsigned char, std::uint8_t)
{
// a star emoji
std::vector<T> v = {'"', static_cast<T>(0xe2), static_cast<T>(0xad), static_cast<T>(0x90), static_cast<T>(0xef), static_cast<T>(0xb8), static_cast<T>(0x8f), '"'};
CHECK(json::parse(v).dump(-1, ' ', true) == "\"\\u2b50\\ufe0f\"");
CHECK(json::accept(v));

SaxEventLogger l;
CHECK(json::sax_parse(v, &l));
CHECK(l.events.size() == 1);
}

TEST_CASE_TEMPLATE("deserialization of different character types (UTF-16)", T,
char16_t, std::uint16_t)
{
// a star emoji
std::vector<T> v = {static_cast<T>('"'), static_cast<T>(0x2b50), static_cast<T>(0xfe0f), static_cast<T>('"')};
CHECK(json::parse(v).dump(-1, ' ', true) == "\"\\u2b50\\ufe0f\"");
CHECK(json::accept(v));

SaxEventLogger l;
CHECK(json::sax_parse(v, &l));
CHECK(l.events.size() == 1);
}

TEST_CASE_TEMPLATE("deserialization of different character types (UTF-32)", T,
char32_t, std::uint32_t)
{
// a star emoji
std::vector<T> v = {static_cast<T>('"'), static_cast<T>(0x2b50), static_cast<T>(0xfe0f), static_cast<T>('"')};
CHECK(json::parse(v).dump(-1, ' ', true) == "\"\\u2b50\\ufe0f\"");
CHECK(json::accept(v));

SaxEventLogger l;
CHECK(json::sax_parse(v, &l));
CHECK(l.events.size() == 1);
}