Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix [blankNodePropertyLists] and (collections) in the relaxed parser #1431

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions src/parser/RdfParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ bool TurtleParser<T>::object() {
// check blank Node first because _: also could look like a prefix
// TODO<joka921> Currently collections and blankNodePropertyLists do not work
// on dblp when using the relaxed parser. Is this fixable?
if (blankNode() || literal() || iri() || collection() ||
blankNodePropertyList()) {
if (blankNode() || blankNodePropertyList() || collection() || literal() ||
iri()) {
emitTriple();
return true;
} else {
Expand Down Expand Up @@ -702,14 +702,16 @@ bool TurtleParser<T>::pnameLnRelaxed() {
// is ok
tok_.skipWhitespaceAndComments();
auto view = tok_.view();
auto pos = view.find(':');
if (pos == string::npos) {
auto pos = view.find_first_of(" \t\r\n,;)]:");
// If we find any whitespace or other characters that end a structure in
// Turtle before we find a ':', the parsing failed.
if (pos == string::npos || view[pos] != ':') {
return false;
}
// these can also be part of a collection etc.
// find any character that can end a pnameLn when assuming that no
// escape sequences were used
auto posEnd = view.find_first_of(" \t\r\n,;", pos);
auto posEnd = view.find_first_of(" \t\r\n,;)]", pos);
if (posEnd == string::npos) {
// make tests work
posEnd = view.size();
Expand Down
33 changes: 30 additions & 3 deletions test/RdfParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,18 @@ auto iri = [](std::string_view s) {
// `rule` (a member function of `Parser` that returns a bool). Return the
// parser, if the call to `rule` returns true, else return `std::nullopt`.
template <typename Parser, auto rule, size_t blankNodePrefix = 0>
std::optional<Parser> parseRule(const std::string& input) {
auto parseRule =
[](const std::string& input,
decltype(Parser::prefixMap_) prefixMap = {}) -> std::optional<Parser> {
Parser parser;
parser.prefixMap_ = std::move(prefixMap);
parser.setBlankNodePrefixOnlyForTesting(blankNodePrefix);
parser.setInputStream(input);
if (!std::invoke(rule, parser)) {
return std::nullopt;
}
return parser;
}
};

// Asserts that parsing the `rule` works and that the last parse result and the
// emitted triples are as expected. Returns the `Parser` instance that is used
Expand Down Expand Up @@ -319,7 +322,7 @@ TEST(RdfParserTest, blankNodePropertyList) {
}

TEST(RdfParserTest, object) {
auto runCommonTests = [](auto p) {
auto runCommonTests = []<typename Parser>(Parser p) {
auto sub = iri("<sub>");
auto pred = iri("<pred>");
p.activeSubject_ = sub;
Expand All @@ -346,6 +349,30 @@ TEST(RdfParserTest, object) {

exp = TurtleTriple{sub, pred, "_:u_someblank"};
ASSERT_EQ(p.triples_.back(), exp);

{
auto map = decltype(Parser::prefixMap_){};
map["b"] = iri("<bla/>");
auto p = parseRule<Parser, &Parser::object>("[ b:blubb 42]", map);
EXPECT_TRUE(p.has_value());
EXPECT_EQ(p.value().lastParseResult_, "_:g_0_0");
EXPECT_EQ(p.value().triples_.size(), 2);
EXPECT_THAT(
p.value().triples_[0],
::testing::Eq(TurtleTriple{"_:g_0_0", iri("<bla/blubb>"), 42}));
}
{
auto map = decltype(Parser::prefixMap_){};
map["b"] = iri("<bla/>");
auto p = parseRule<Parser, &Parser::object>("(b:blubb)]", map);
EXPECT_TRUE(p.has_value());
EXPECT_EQ(p.value().lastParseResult_, "_:g_0_0");
EXPECT_EQ(p.value().triples_.size(), 3);
auto first = iri("<http://www.w3.org/1999/02/22-rdf-syntax-ns#first>");
EXPECT_THAT(
p.value().triples_[0],
::testing::Eq(TurtleTriple{"_:g_0_0", first, iri("<bla/blubb>")}));
}
};
runCommonTests(Re2Parser{});
runCommonTests(CtreParser{});
Expand Down
Loading