Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retain literal types in collection backend #165

Merged
merged 11 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions tests/test_collection.py → tests/backends/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,20 @@ def test_collection():
ts.remove(object=EMMO.Mass)
assert set(ts.triples()) == set(triples[:-1])


# TODO: Fix handling of Literal in Collections (Issue #160, PR #165) and
# reactivate test.
#
# # Test that we can initialise from an existing collection
# coll = dlite.Collection()
# for triple in triples:
# coll.add_relation(*triple)
# ts2 = Triplestore(backend="collection", collection=coll)
# assert set(ts2.triples()) == set(triples)
#
# # Test serialising/parsing
# dump = ts.serialize(backend="rdflib")
# ts3 = Triplestore(backend="collection")
# ts3.parse(backend="rdflib", data=dump)
# assert set(ts3.triples()) == set(triples)
# Test that we can initialise from an existing collection
coll = dlite.Collection()
coll.add_relation(STRUCTURE.name, DM.hasLabel, "Strontium titanate", "@en")
coll.add_relation(STRUCTURE.masses, DM.hasUnit, "u", XSD.string)
for triple in triples[2:]:
coll.add_relation(*triple)
ts2 = Triplestore(backend="collection", collection=coll)
assert set(ts2.triples()) == set(triples)

# Test serialising/parsing
dump = ts.serialize()
ts3 = Triplestore(backend="collection")
ts3.parse(data=dump)
assert set(ts3.triples()) == set(ts.triples())
label = ts3.value(STRUCTURE.name, DM.hasLabel)
assert isinstance(label, Literal)
assert label == Literal("Strontium titanate", lang="en")
8 changes: 2 additions & 6 deletions tests/test_triplestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,8 @@ def test_triplestore( # pylint: disable=too-many-locals
standard="fno",
)

try:
ts_as_turtle = ts.serialize(format="turtle")
except NotImplementedError:
pass
else:
assert ts_as_turtle == expected_function_triplestore
ts_as_turtle = ts.serialize(format="turtle")
assert ts_as_turtle == expected_function_triplestore

# Test SPARQL query
try:
Expand Down
20 changes: 13 additions & 7 deletions tripper/backends/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,31 +66,37 @@ def triples(self, triple: "Triple") -> "Generator[Triple, None, None]":
else:
yield s, p, o

def add_triples(self, triples: "Sequence[Triple]"):
def add_triples(
self, triples: "Union[Sequence[Triple], Generator[Triple, None, None]]"
):
"""Add a sequence of triples."""
for s, p, o in triples:
# Strange complains by mypy - it assumed that
# parse_object() is returning a `str` regardless that it
# has been declared to return the union of `str` and
# `Literal`.
v = parse_object(o)
o = v if isinstance(v, str) else v.value

# Possible bug in mypy
# parse_object() is declared to return "Union[str, Literal]".
# Dispite of that complains mypy about that `v` has no
# attribute "value"
obj = v if isinstance(v, str) else str(v.value) # type: ignore
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Possible bug in mypy
# parse_object() is declared to return "Union[str, Literal]".
# Dispite of that complains mypy about that `v` has no
# attribute "value"
obj = v if isinstance(v, str) else str(v.value) # type: ignore
obj = v if isinstance(v, str) else str(v.value)

I have thested locally and with a draft pull request, I do not get any errors with mypy. Do you have an updated version?

Copy link
Contributor Author

@jesper-friis jesper-friis Feb 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem came when I updated mypy. I got annoyed with it and changed the return type of utils.parse_object() from Union[str, Literal] to Union[str, Any] to get rid of these errors...

The source of the problem is that mypy suddenly has got the idea that Literal has got a special meaning, since it was added to typing.

It even doesn't understand if I change the return type of utils.parse_object() to Union[str, tripper.Literal].

Using Any in the return works around the problem. I have moved and updated the comment about buggy mypy to parse_object() and removed the # type: ignore as you suggested.

d = (
None
if not isinstance(v, Literal)
else f"@{v.lang}" if v.lang else v.datatype
)
self.collection.add_relation(s, p, o, d)
self.collection.add_relation(s, p, obj, d)

def remove(self, triple: "Triple"):
"""Remove all matching triples from the backend."""
s, p, o = triple
v = parse_object(o)
o = v if isinstance(v, str) else v.value
obj = v if isinstance(v, str) else str(v.value) # type: ignore
jesper-friis marked this conversation as resolved.
Show resolved Hide resolved
d = (
None
if isinstance(v, str)
if not isinstance(v, Literal)
else f"@{v.lang}" if v.lang else v.datatype
)
# v_str = v.n3() if isinstance(v, Literal) else v
self.collection.remove_relations(s, p, o, d)
self.collection.remove_relations(s, p, obj, d)
60 changes: 31 additions & 29 deletions tripper/triplestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,33 +294,35 @@ def parse(
self,
source=None,
format=None,
backend=None,
backend_kwargs=None,
fallback_backend="rdflib",
fallback_backend_kwargs=None,
Comment on lines +297 to +298
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we didn't need to change backend/backend_kwargs to fallback_backend/fallback_backend_kwargs in the tests, it seems that we do not have adeguate tests.

Copy link
Contributor Author

@jesper-friis jesper-friis Feb 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the collection backend does not provide a parse() method, so it will use the fallback backend. That is tested when calling ts.parse() in tests/backends/test_collection.py.

**kwargs, # pylint: disable=redefined-builtin
) -> None:
"""Parse source and add the resulting triples to triplestore.

Parameters:
source: File-like object or file name.
format: Needed if format can not be inferred from source.
backend: If given, use the parse() method of the specified
backend instead of the current backend.
backend_kwargs: Dict with additional keyword arguments passed
when instantiating `backend`.
fallback_backend: If the current backend doesn't implement
parse, use the `fallback_backend` instead.
fallback_backend_kwargs: Dict with additional keyword arguments
for initialising `fallback_backend`.
Comment on lines +306 to +309
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not see where these are tested. According to codecov, they are, so currently it should be OK. But it is important that this is tested on purpose and not "by accident", e.g. by using a backend that right now does not contain parse but might in the future.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are tested in tests/backends/test_collection.py

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I prefer tests to be a bit more explicit.

kwargs: Keyword arguments passed to the backend.
The rdflib backend supports e.g. `location` (absolute
or relative URL) and `data` (string containing the
data to be parsed) arguments.
"""
if backend and backend != self.backend_name:
if backend_kwargs is None:
backend_kwargs = {}
ts = Triplestore(backend=backend, **backend_kwargs)
ts.parse(source=source, format=format, **kwargs)
self.add_triples(ts.triples())
else:
if hasattr(self.backend, "parse"):
self._check_method("parse")
self.backend.parse(source=source, format=format, **kwargs)
else:
if fallback_backend_kwargs is None:
fallback_backend_kwargs = {}
ts = Triplestore(
backend=fallback_backend, **fallback_backend_kwargs
)
ts.parse(source=source, format=format, **kwargs)
self.add_triples(ts.triples())

if hasattr(self.backend, "namespaces"):
for prefix, namespace in self.backend.namespaces().items():
Expand All @@ -331,8 +333,8 @@ def serialize(
self,
destination=None,
format="turtle", # pylint: disable=redefined-builtin
backend=None,
backend_kwargs=None,
fallback_backend="rdflib",
fallback_backend_kwargs=None,
**kwargs,
) -> "Union[None, str]":
"""Serialise triplestore.
Expand All @@ -342,28 +344,28 @@ def serialize(
serialisation is returned.
format: Format to serialise as. Supported formats, depends on
the backend.
backend: If given, use the parse() method of the specified
backend instead of the current backend.
backend_kwargs: Dict with additional keyword arguments passed
when instantiating `backend`.
fallback_backend: If the current backend doesn't implement
serialisation, use the `fallback_backend` instead.
fallback_backend_kwargs: Dict with additional keyword arguments
for initialising `fallback_backend`.
Comment on lines +347 to +350
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as for parse above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested in tests/backends/test_collection.py

kwargs: Passed to the backend serialize() method.

Returns:
Serialized string if `destination` is None.
"""
if backend and backend != self.backend_name:
if backend_kwargs is None:
backend_kwargs = {}
ts = Triplestore(backend=backend, **backend_kwargs)
ts.add_triples(self.triples())
return ts.serialize(
if hasattr(self.backend, "parse"):
self._check_method("serialize")
return self.backend.serialize(
destination=destination, format=format, **kwargs
)

self._check_method("serialize")
return self.backend.serialize(
destination=destination, format=format, **kwargs
)
if fallback_backend_kwargs is None:
fallback_backend_kwargs = {}
ts = Triplestore(backend=fallback_backend, **fallback_backend_kwargs)
ts.add_triples(self.triples())
for prefix, iri in self.namespaces.items():
ts.bind(prefix, iri)
return ts.serialize(destination=destination, format=format, **kwargs)

def query(self, query_object, **kwargs) -> "List[Tuple[str, ...]]":
"""SPARQL query.
Expand Down
Loading