diff --git a/src/parser/RdfParser.cpp b/src/parser/RdfParser.cpp index 2e0d69265..929b57752 100644 --- a/src/parser/RdfParser.cpp +++ b/src/parser/RdfParser.cpp @@ -189,8 +189,8 @@ bool TurtleParser::object() { // check blank Node first because _: also could look like a prefix // TODO Currently collections and blankNodePropertyLists do not work // on dblp when using the relaxed parser. Is this fixable? - if (blankNode() || literal() || iri() || collection() || - blankNodePropertyList()) { + if (blankNode() || blankNodePropertyList() || collection() || literal() || + iri()) { emitTriple(); return true; } else { @@ -702,14 +702,16 @@ bool TurtleParser::pnameLnRelaxed() { // is ok tok_.skipWhitespaceAndComments(); auto view = tok_.view(); - auto pos = view.find(':'); - if (pos == string::npos) { + auto pos = view.find_first_of(" \t\r\n,;)]:"); + // If we find any whitespace or other characters that end a structure in + // Turtle before we find a ':', the parsing failed. + if (pos == string::npos || view[pos] != ':') { return false; } // these can also be part of a collection etc. // find any character that can end a pnameLn when assuming that no // escape sequences were used - auto posEnd = view.find_first_of(" \t\r\n,;", pos); + auto posEnd = view.find_first_of(" \t\r\n,;)]", pos); if (posEnd == string::npos) { // make tests work posEnd = view.size(); diff --git a/test/RdfParserTest.cpp b/test/RdfParserTest.cpp index 21e96ae41..0fd6c2a71 100644 --- a/test/RdfParserTest.cpp +++ b/test/RdfParserTest.cpp @@ -31,15 +31,18 @@ auto iri = [](std::string_view s) { // `rule` (a member function of `Parser` that returns a bool). Return the // parser, if the call to `rule` returns true, else return `std::nullopt`. template -std::optional parseRule(const std::string& input) { +auto parseRule = + [](const std::string& input, + decltype(Parser::prefixMap_) prefixMap = {}) -> std::optional { Parser parser; + parser.prefixMap_ = std::move(prefixMap); parser.setBlankNodePrefixOnlyForTesting(blankNodePrefix); parser.setInputStream(input); if (!std::invoke(rule, parser)) { return std::nullopt; } return parser; -} +}; // Asserts that parsing the `rule` works and that the last parse result and the // emitted triples are as expected. Returns the `Parser` instance that is used @@ -319,7 +322,7 @@ TEST(RdfParserTest, blankNodePropertyList) { } TEST(RdfParserTest, object) { - auto runCommonTests = [](auto p) { + auto runCommonTests = [](Parser p) { auto sub = iri(""); auto pred = iri(""); p.activeSubject_ = sub; @@ -346,6 +349,30 @@ TEST(RdfParserTest, object) { exp = TurtleTriple{sub, pred, "_:u_someblank"}; ASSERT_EQ(p.triples_.back(), exp); + + { + auto map = decltype(Parser::prefixMap_){}; + map["b"] = iri(""); + auto p = parseRule("[ b:blubb 42]", map); + EXPECT_TRUE(p.has_value()); + EXPECT_EQ(p.value().lastParseResult_, "_:g_0_0"); + EXPECT_EQ(p.value().triples_.size(), 2); + EXPECT_THAT( + p.value().triples_[0], + ::testing::Eq(TurtleTriple{"_:g_0_0", iri(""), 42})); + } + { + auto map = decltype(Parser::prefixMap_){}; + map["b"] = iri(""); + auto p = parseRule("(b:blubb)]", map); + EXPECT_TRUE(p.has_value()); + EXPECT_EQ(p.value().lastParseResult_, "_:g_0_0"); + EXPECT_EQ(p.value().triples_.size(), 3); + auto first = iri(""); + EXPECT_THAT( + p.value().triples_[0], + ::testing::Eq(TurtleTriple{"_:g_0_0", first, iri("")})); + } }; runCommonTests(Re2Parser{}); runCommonTests(CtreParser{});