From e44efbd7a53d32ee94cc2771860b1446c0b5cac8 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 1 Aug 2024 11:04:17 +0200 Subject: [PATCH] Fix a bug when using [blankNodePropertyLists] and (collections) together with the relaxed parser. TODO: Probably the solution is to throw out the relaxed parser completely. --- src/parser/TurtleParser.cpp | 12 +++++++----- test/TurtleParserTest.cpp | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/parser/TurtleParser.cpp b/src/parser/TurtleParser.cpp index e0725aa3c..edc8cdacf 100644 --- a/src/parser/TurtleParser.cpp +++ b/src/parser/TurtleParser.cpp @@ -189,8 +189,8 @@ bool TurtleParser::object() { // check blank Node first because _: also could look like a prefix // TODO Currently collections and blankNodePropertyLists do not work // on dblp when using the relaxed parser. Is this fixable? - if (blankNode() || literal() || iri() || collection() || - blankNodePropertyList()) { + if (blankNode() || blankNodePropertyList() || collection() || literal() || + iri()) { emitTriple(); return true; } else { @@ -632,14 +632,16 @@ bool TurtleParser::pnameLnRelaxed() { // is ok tok_.skipWhitespaceAndComments(); auto view = tok_.view(); - auto pos = view.find(':'); - if (pos == string::npos) { + auto pos = view.find_first_of(" \t\r\n,;)]:"); + // If we find any whitespace or other characters that end a structure in + // Turtle before we find a ':', the parsing failed. + if (pos == string::npos || view[pos] != ':') { return false; } // these can also be part of a collection etc. // find any character that can end a pnameLn when assuming that no // escape sequences were used - auto posEnd = view.find_first_of(" \t\r\n,;", pos); + auto posEnd = view.find_first_of(" \t\r\n,;)]", pos); if (posEnd == string::npos) { // make tests work posEnd = view.size(); diff --git a/test/TurtleParserTest.cpp b/test/TurtleParserTest.cpp index 44f1928bd..b8dc3d37f 100644 --- a/test/TurtleParserTest.cpp +++ b/test/TurtleParserTest.cpp @@ -29,15 +29,18 @@ auto iri = [](std::string_view s) { // `rule` (a member function of `Parser` that returns a bool). Return the // parser, if the call to `rule` returns true, else return `std::nullopt`. template -std::optional parseRule(const std::string& input) { +auto parseRule = + [](const std::string& input, + decltype(Parser::prefixMap_) prefixMap = {}) -> std::optional { Parser parser; + parser.prefixMap_ = std::move(prefixMap); parser.setBlankNodePrefixOnlyForTesting(blankNodePrefix); parser.setInputStream(input); if (!std::invoke(rule, parser)) { return std::nullopt; } return parser; -} +}; // Asserts that parsing the `rule` works and that the last parse result and the // emitted triples are as expected. Returns the `Parser` instance that is used @@ -317,7 +320,7 @@ TEST(TurtleParserTest, blankNodePropertyList) { } TEST(TurtleParserTest, object) { - auto runCommonTests = [](auto p) { + auto runCommonTests = [](Parser p) { auto sub = iri(""); auto pred = iri(""); p.activeSubject_ = sub; @@ -344,6 +347,30 @@ TEST(TurtleParserTest, object) { exp = TurtleTriple{sub, pred, "_:u_someblank"}; ASSERT_EQ(p.triples_.back(), exp); + + { + auto map = decltype(Parser::prefixMap_){}; + map["b"] = iri(""); + auto p = parseRule("[ b:blubb 42]", map); + EXPECT_TRUE(p.has_value()); + EXPECT_EQ(p.value().lastParseResult_, "_:g_0_0"); + EXPECT_EQ(p.value().triples_.size(), 2); + EXPECT_THAT( + p.value().triples_[0], + ::testing::Eq(TurtleTriple{"_:g_0_0", iri(""), 42})); + } + { + auto map = decltype(Parser::prefixMap_){}; + map["b"] = iri(""); + auto p = parseRule("(b:blubb)]", map); + EXPECT_TRUE(p.has_value()); + EXPECT_EQ(p.value().lastParseResult_, "_:g_0_0"); + EXPECT_EQ(p.value().triples_.size(), 3); + auto first = iri(""); + EXPECT_THAT( + p.value().triples_[0], + ::testing::Eq(TurtleTriple{"_:g_0_0", first, iri("")})); + } }; runCommonTests(Re2Parser{}); runCommonTests(CtreParser{});