Skip to content

Commit

Permalink
Fix #304
Browse files Browse the repository at this point in the history
  • Loading branch information
yhirose committed Sep 1, 2024
1 parent 2b02299 commit 5e33020
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 57 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,25 @@ custom_message.txt:1:8: code format error...

NOTE: If there are more than one elements with error message instruction in a prioritized choice, this feature may not work as you expect.

Change the Start Definition Rule
--------------------------------

We can change the start definition rule as below.

```cpp
peg::parser parser(
R"(
Start <- A
A <- B (',' B)*
B <- '[one]' / '[two]'
%whitespace <- [ \t\n]*
)",
"A" // Start Rule is "A"
)";

parser.parse(" [one] , [two] "); // OK
```

peglint - PEG syntax lint utility
---------------------------------

Expand Down
102 changes: 52 additions & 50 deletions peglib.h
Original file line number Diff line number Diff line change
Expand Up @@ -3298,18 +3298,15 @@ using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;

class ParserGenerator {
public:
static std::shared_ptr<Grammar> parse(const char *s, size_t n,
const Rules &rules, std::string &start,
bool &enablePackratParsing, Log log) {
return get_instance().perform_core(s, n, rules, start, enablePackratParsing,
log);
}
struct ParserContext {
std::shared_ptr<Grammar> grammar;
std::string start;
bool enablePackratParsing = false;
};

static std::shared_ptr<Grammar> parse(const char *s, size_t n,
std::string &start,
bool &enablePackratParsing, Log log) {
Rules dummy;
return parse(s, n, dummy, start, enablePackratParsing, log);
static ParserContext parse(const char *s, size_t n, const Rules &rules,
Log log, std::string_view start) {
return get_instance().perform_core(s, n, rules, log, start);
}

// For debugging purpose
Expand Down Expand Up @@ -3989,9 +3986,8 @@ class ParserGenerator {
return true;
}

std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
const Rules &rules, std::string &start,
bool &enablePackratParsing, Log log) {
ParserContext perform_core(const char *s, size_t n, const Rules &rules,
Log log, std::string_view requested_start) {
Data data;
auto &grammar = *data.grammar;

Expand Down Expand Up @@ -4023,7 +4019,7 @@ class ParserGenerator {
log(line.first, line.second, "syntax error", r.error_info.label);
}
}
return nullptr;
return {};
}

// User provided rules
Expand Down Expand Up @@ -4081,7 +4077,10 @@ class ParserGenerator {
}

// Set root definition
auto &start_rule = grammar[data.start];
auto start = data.start;
if (!requested_start.empty()) { start = requested_start; }

auto &start_rule = grammar[start];

// Check if the start rule has ignore operator
{
Expand All @@ -4096,7 +4095,7 @@ class ParserGenerator {
}
}

if (!ret) { return nullptr; }
if (!ret) { return {}; }

// Check missing definitions
auto referenced = std::unordered_set<std::string>{
Expand Down Expand Up @@ -4129,7 +4128,7 @@ class ParserGenerator {
}
}

if (!ret) { return nullptr; }
if (!ret) { return {}; }

// Link references
for (auto &x : grammar) {
Expand All @@ -4153,10 +4152,10 @@ class ParserGenerator {
}
}

if (!ret) { return nullptr; }
if (!ret) { return {}; }

// Check infinite loop
if (detect_infiniteLoop(data, start_rule, log, s)) { return nullptr; }
if (detect_infiniteLoop(data, start_rule, log, s)) { return {}; }

// Automatic whitespace skipping
if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
Expand All @@ -4169,15 +4168,15 @@ class ParserGenerator {
auto &rule = grammar[WHITESPACE_DEFINITION_NAME];
start_rule.whitespaceOpe = wsp(rule.get_core_operator());

if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; }
if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
}

// Word expression
if (grammar.count(WORD_DEFINITION_NAME)) {
auto &rule = grammar[WORD_DEFINITION_NAME];
start_rule.wordOpe = rule.get_core_operator();

if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; }
if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
}

// Apply instructions
Expand All @@ -4189,9 +4188,7 @@ class ParserGenerator {
const auto &info =
std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);

if (!apply_precedence_instruction(rule, info, s, log)) {
return nullptr;
}
if (!apply_precedence_instruction(rule, info, s, log)) { return {}; }
} else if (instruction.type == "error_message") {
rule.error_message = std::any_cast<std::string>(instruction.data);
} else if (instruction.type == "no_ast_opt") {
Expand All @@ -4200,11 +4197,7 @@ class ParserGenerator {
}
}

// Set root definition
start = data.start;
enablePackratParsing = data.enablePackratParsing;

return data.grammar;
return {data.grammar, start, data.enablePackratParsing};
}

bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log,
Expand Down Expand Up @@ -4530,43 +4523,52 @@ class parser {
public:
parser() = default;

parser(const char *s, size_t n, const Rules &rules) {
load_grammar(s, n, rules);
parser(const char *s, size_t n, const Rules &rules,
std::string_view start = {}) {
load_grammar(s, n, rules, start);
}

parser(const char *s, size_t n) : parser(s, n, Rules()) {}
parser(const char *s, size_t n, std::string_view start = {})
: parser(s, n, Rules(), start) {}

parser(std::string_view sv, const Rules &rules)
: parser(sv.data(), sv.size(), rules) {}
parser(std::string_view sv, const Rules &rules, std::string_view start = {})
: parser(sv.data(), sv.size(), rules, start) {}

parser(std::string_view sv) : parser(sv.data(), sv.size(), Rules()) {}
parser(std::string_view sv, std::string_view start = {})
: parser(sv.data(), sv.size(), Rules(), start) {}

#if defined(__cpp_lib_char8_t)
parser(std::u8string_view sv, const Rules &rules)
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules) {}
parser(std::u8string_view sv, const Rules &rules, std::string_view start = {})
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules,
start) {}

parser(std::u8string_view sv)
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules()) {}
parser(std::u8string_view sv, std::string_view start = {})
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules(),
start) {}
#endif

operator bool() { return grammar_ != nullptr; }

bool load_grammar(const char *s, size_t n, const Rules &rules) {
grammar_ = ParserGenerator::parse(s, n, rules, start_,
enablePackratParsing_, log_);
bool load_grammar(const char *s, size_t n, const Rules &rules,
std::string_view start = {}) {
auto cxt = ParserGenerator::parse(s, n, rules, log_, start);
grammar_ = cxt.grammar;
start_ = cxt.start;
enablePackratParsing_ = cxt.enablePackratParsing;
return grammar_ != nullptr;
}

bool load_grammar(const char *s, size_t n) {
return load_grammar(s, n, Rules());
bool load_grammar(const char *s, size_t n, std::string_view start = {}) {
return load_grammar(s, n, Rules(), start);
}

bool load_grammar(std::string_view sv, const Rules &rules) {
return load_grammar(sv.data(), sv.size(), rules);
bool load_grammar(std::string_view sv, const Rules &rules,
std::string_view start = {}) {
return load_grammar(sv.data(), sv.size(), rules, start);
}

bool load_grammar(std::string_view sv) {
return load_grammar(sv.data(), sv.size());
bool load_grammar(std::string_view sv, std::string_view start = {}) {
return load_grammar(sv.data(), sv.size(), start);
}

bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
Expand Down Expand Up @@ -4671,7 +4673,7 @@ class parser {
void enable_packrat_parsing() {
if (grammar_ != nullptr) {
auto &rule = (*grammar_)[start_];
rule.enablePackratParsing = enablePackratParsing_ && true;
rule.enablePackratParsing = enablePackratParsing_;
}
}

Expand Down
45 changes: 38 additions & 7 deletions test/test1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,9 @@ TEST(GeneralTest, Skip_token_test2) {
}

TEST(GeneralTest, Custom_AST_test) {
struct CustomType { bool dummy = false; };
struct CustomType {
bool dummy = false;
};
using CustomAst = AstBase<CustomType>;

parser parser(R"(
Expand Down Expand Up @@ -646,11 +648,8 @@ TEST(GeneralTest, Calculator_test2) {
NUMBER <- [0-9]+
)";

std::string start;
bool enablePackratParsing = false;
auto grammar = ParserGenerator::parse(syntax, strlen(syntax), start,
enablePackratParsing, nullptr);
auto &g = *grammar;
auto cxt = ParserGenerator::parse(syntax, strlen(syntax), {}, nullptr, {});
auto &g = *cxt.grammar;

// Setup actions
auto reduce = [](const SemanticValues &vs) -> long {
Expand Down Expand Up @@ -679,7 +678,7 @@ TEST(GeneralTest, Calculator_test2) {

// Parse
long val;
auto r = g[start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val);
auto r = g[cxt.start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val);

EXPECT_TRUE(r.ret);
EXPECT_EQ(-3, val);
Expand Down Expand Up @@ -1285,3 +1284,35 @@ TEST(GeneralTest, PassingContextAndOutputParameter) {
parser.parse<int>("42", dt, output);
EXPECT_EQ(42, output);
}

TEST(GeneralTest, SpecifyStartRule) {
auto grammar = R"(
Start <- A
A <- B (',' B)*
B <- '[one]' / '[two]'
%whitespace <- [ \t\n]*
)";

{
parser peg(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}

{
parser peg(grammar);
EXPECT_TRUE(peg.parse(" [one] , [two] "));

peg.load_grammar(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}

{
parser peg;

peg.load_grammar(grammar);
EXPECT_TRUE(peg.parse(" [one] , [two] "));

peg.load_grammar(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}
}

0 comments on commit 5e33020

Please sign in to comment.