Skip to content

Commit

Permalink
Extend sax parser to optionally accept position information for parse…
Browse files Browse the repository at this point in the history
…d tokens
  • Loading branch information
barcode committed Dec 4, 2021
1 parent 3311d11 commit fffed12
Show file tree
Hide file tree
Showing 7 changed files with 2,542 additions and 98 deletions.
210 changes: 182 additions & 28 deletions include/nlohmann/detail/input/binary_reader.hpp

Large diffs are not rendered by default.

14 changes: 11 additions & 3 deletions include/nlohmann/detail/input/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1498,13 +1498,13 @@ class lexer : public lexer_base<BasicJsonType>
while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
}

token_type scan()
bool scan_start()
{
// initially, skip the BOM
if (position.chars_read_total == 0 && !skip_bom())
{
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
return token_type::parse_error;
return false;
}

// read next character and ignore whitespace
Expand All @@ -1515,13 +1515,17 @@ class lexer : public lexer_base<BasicJsonType>
{
if (!scan_comment())
{
return token_type::parse_error;
return false;
}

// skip following whitespace
skip_whitespace();
}
return true;
}

token_type scan_end()
{
switch (current)
{
// structural characters
Expand Down Expand Up @@ -1585,6 +1589,10 @@ class lexer : public lexer_base<BasicJsonType>
return token_type::parse_error;
}
}
token_type scan()
{
return !scan_start() ? token_type::parse_error : scan_end();
}

private:
/// input adapter
Expand Down
45 changes: 27 additions & 18 deletions include/nlohmann/detail/input/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ class parser
, m_lexer(std::move(adapter), skip_comments)
, allow_exceptions(allow_exceptions_)
{
// read first token
get_token();
}

/*!
Expand All @@ -90,7 +88,7 @@ class parser
sax_parse_internal(&sdp);

// in strict mode, input must be completely read
if (strict && (get_token() != token_type::end_of_input))
if (strict && (get_token(&sdp) != token_type::end_of_input))
{
sdp.parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
Expand Down Expand Up @@ -118,7 +116,7 @@ class parser
sax_parse_internal(&sdp);

// in strict mode, input must be completely read
if (strict && (get_token() != token_type::end_of_input))
if (strict && (get_token(&sdp) != token_type::end_of_input))
{
sdp.parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
Expand Down Expand Up @@ -156,7 +154,7 @@ class parser
const bool result = sax_parse_internal(sax);

// strict mode: next byte must be EOF
if (result && strict && (get_token() != token_type::end_of_input))
if (result && strict && (get_token(sax) != token_type::end_of_input))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
Expand All @@ -177,6 +175,8 @@ class parser
// value to avoid a goto (see comment where set to true)
bool skip_to_state_evaluation = false;

// read first token
get_token(sax);
while (true)
{
if (!skip_to_state_evaluation)
Expand All @@ -192,7 +192,7 @@ class parser
}

// closing } -> we are done
if (get_token() == token_type::end_object)
if (get_token(sax) == token_type::end_object)
{
if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
{
Expand All @@ -214,7 +214,7 @@ class parser
}

// parse separator (:)
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
Expand All @@ -225,7 +225,7 @@ class parser
states.push_back(false);

// parse values
get_token();
get_token(sax);
continue;
}

Expand All @@ -237,7 +237,7 @@ class parser
}

// closing ] -> we are done
if (get_token() == token_type::end_array)
if (get_token(sax) == token_type::end_array)
{
if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
{
Expand Down Expand Up @@ -364,10 +364,10 @@ class parser
if (states.back()) // array
{
// comma -> next value
if (get_token() == token_type::value_separator)
if (get_token(sax) == token_type::value_separator)
{
// parse a new value
get_token();
get_token(sax);
continue;
}

Expand Down Expand Up @@ -397,10 +397,10 @@ class parser
// states.back() is false -> object

// comma -> next value
if (get_token() == token_type::value_separator)
if (get_token(sax) == token_type::value_separator)
{
// parse key
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::value_string))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
Expand All @@ -413,15 +413,15 @@ class parser
}

// parse separator (:)
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), BasicJsonType()));
}

// parse values
get_token();
get_token(sax);
continue;
}

Expand Down Expand Up @@ -449,10 +449,19 @@ class parser
}
}

/// get next token from lexer
token_type get_token()
/// get next token from lexer and pass position info to sax (if it is accepted)
template<class SAX>
token_type get_token(SAX* sax)
{
return last_token = m_lexer.scan();
if (!m_lexer.scan_start())
{
last_token = token_type::parse_error;
return token_type::parse_error;
}
detail::sax_call_next_token_start_pos(sax, m_lexer);
last_token = m_lexer.scan_end();
detail::sax_call_next_token_end_pos(sax, m_lexer);
return last_token;
}

std::string exception_message(const token_type expected, const std::string& context)
Expand Down
106 changes: 106 additions & 0 deletions include/nlohmann/detail/meta/is_sax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,112 @@ namespace nlohmann
{
namespace detail
{
struct sax_call_next_token_start_pos_direct
{
template<typename SAX, typename...Ts>
static auto call(SAX* sax, Ts&& ...ts)
-> decltype(sax->next_token_start(std::forward<Ts>(ts)...))
{
sax->next_token_start(std::forward<Ts>(ts)...);
}
};
struct sax_call_next_token_end_pos_direct
{
template<typename SAX, typename...Ts>
static auto call(SAX* sax, Ts&& ...ts)
-> decltype(sax->next_token_end(std::forward<Ts>(ts)...))
{
sax->next_token_end(std::forward<Ts>(ts)...);
}
};

template <typename DirectCaller, typename SAX, typename LexOrPos>
struct sax_call_function
{
static constexpr bool no_lexer = std::is_same<LexOrPos, std::size_t>::value;

template<typename SAX2, typename...Ts2>
using call_t = decltype(DirectCaller::call(std::declval<SAX2*>(), std::declval<Ts2>()...));

static constexpr bool detected_call_with_pos =
is_detected_exact<void, call_t, SAX, std::size_t>::value;

static constexpr bool detected_call_with_lex =
!no_lexer &&
is_detected_exact<void, call_t, SAX, const LexOrPos>::value;

static constexpr bool valid = detected_call_with_pos || detected_call_with_lex;

template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_pos
>::type
call(SaxT* sax, std::size_t pos)
{
DirectCaller::call(sax, pos);
}

template<typename SaxT = SAX>
static typename std::enable_if <
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::valid
>::type
call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {}

template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex);
}

template<typename SaxT = SAX>
static typename std::enable_if <
sax_call_function<DirectCaller, SaxT, LexOrPos>::valid &&
std::is_same<SaxT, SAX>::value &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::no_lexer &&
!sax_call_function<DirectCaller, SaxT, LexOrPos>::detected_call_with_lex
>::type
call(SaxT* sax, const LexOrPos& lex)
{
DirectCaller::call(sax, lex.get_position().chars_read_total);
}
};

template<class SAX, class LexOrPos>
void sax_call_next_token_start_pos(SAX* sax, const LexOrPos& lexOrPos)
{
using call_t = sax_call_function<sax_call_next_token_start_pos_direct, SAX, LexOrPos>;
call_t::call(sax, lexOrPos);
}
template<class SAX, class LexOrPos>
void sax_call_next_token_end_pos(SAX* sax, const LexOrPos& lexOrPos)
{
using call_t = sax_call_function<sax_call_next_token_end_pos_direct, SAX, LexOrPos>;
call_t::call(sax, lexOrPos);
}
template<class SAX, class LexOrPos1, class LexOrPos2>
void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos1& lexOrPos1, const LexOrPos2& lexOrPos2)
{
sax_call_next_token_start_pos(sax, lexOrPos1);
sax_call_next_token_end_pos(sax, lexOrPos2);
}
template<class SAX, class LexOrPos>
void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos& lexOrPos)
{
sax_call_next_token_start_pos(sax, lexOrPos);
sax_call_next_token_end_pos(sax, lexOrPos);
}



template<typename T>
using null_function_t = decltype(std::declval<T&>().null());

Expand Down
Loading

0 comments on commit fffed12

Please sign in to comment.