Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sort Turtle output #1978

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
20 changes: 16 additions & 4 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,7 @@ def serialize(
format: str,
base: Optional[str],
encoding: str,
sort: bool = ...,
**args: Any,
) -> bytes: ...

Expand All @@ -1304,6 +1305,7 @@ def serialize(
base: Optional[str] = ...,
*,
encoding: str,
sort: bool = ...,
**args: Any,
) -> bytes: ...

Expand All @@ -1315,6 +1317,7 @@ def serialize(
format: str = ...,
base: Optional[str] = ...,
encoding: None = ...,
sort: bool = ...,
**args: Any,
) -> str: ...

Expand All @@ -1326,6 +1329,7 @@ def serialize(
format: str = ...,
base: Optional[str] = ...,
encoding: Optional[str] = ...,
sort: bool = ...,
**args: Any,
) -> Graph: ...

Expand All @@ -1337,6 +1341,7 @@ def serialize(
format: str = ...,
base: Optional[str] = ...,
encoding: Optional[str] = ...,
sort: bool = ...,
**args: Any,
) -> bytes | str | Graph: ...

Expand All @@ -1346,6 +1351,7 @@ def serialize(
format: str = "turtle",
base: Optional[str] = None,
encoding: Optional[str] = None,
sort: bool = False,
**args: Any,
) -> bytes | str | _GraphT:
"""
Expand Down Expand Up @@ -1389,14 +1395,20 @@ def serialize(
if destination is None:
stream = BytesIO()
if encoding is None:
serializer.serialize(stream, base=base, encoding="utf-8", **args)
serializer.serialize(
stream, base=base, encoding="utf-8", sort=sort, **args
)
return stream.getvalue().decode("utf-8")
else:
serializer.serialize(stream, base=base, encoding=encoding, **args)
serializer.serialize(
stream, base=base, encoding=encoding, sort=sort, **args
)
return stream.getvalue()
if hasattr(destination, "write"):
stream = cast(IO[bytes], destination)
serializer.serialize(stream, base=base, encoding=encoding, **args)
serializer.serialize(
stream, base=base, encoding=encoding, sort=sort, **args
)
else:
if isinstance(destination, pathlib.PurePath):
os_path = str(destination)
Expand All @@ -1412,7 +1424,7 @@ def serialize(
else:
os_path = location
with open(os_path, "wb") as stream:
serializer.serialize(stream, encoding=encoding, **args)
serializer.serialize(stream, encoding=encoding, sort=sort, **args)
return self

def print(
Expand Down
1 change: 1 addition & 0 deletions rdflib/plugins/serializers/hext.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = "utf-8",
*args: Any,
**kwargs: Any,
) -> None:
if base is not None:
Expand Down
1 change: 1 addition & 0 deletions rdflib/plugins/serializers/longturtle.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
*args: Any,
spacious: Optional[bool] = None,
**kwargs: Any,
) -> None:
Expand Down
1 change: 1 addition & 0 deletions rdflib/plugins/serializers/nquads.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
*args: Any,
**kwargs: Any,
) -> None:
if base is not None:
Expand Down
1 change: 1 addition & 0 deletions rdflib/plugins/serializers/nt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = "utf-8",
*args: Any,
**kwargs: Any,
) -> None:
if base is not None:
Expand Down
1 change: 1 addition & 0 deletions rdflib/plugins/serializers/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
*args: Any,
**kwargs: Any,
) -> None:
"""
Expand Down
2 changes: 2 additions & 0 deletions rdflib/plugins/serializers/rdfxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
*args: Any,
**kwargs: Any,
) -> None:
# if base is given here, use that, if not and a base is set for the graph use that
Expand Down Expand Up @@ -176,6 +177,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
*args: Any,
**kwargs: Any,
) -> None:
self.__serialized: dict[IdentifiedNode | Literal, int] = {}
Expand Down
1 change: 1 addition & 0 deletions rdflib/plugins/serializers/trig.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
*args: Any,
spacious: Optional[bool] = None,
**kwargs: Any,
) -> None:
Expand Down
1 change: 1 addition & 0 deletions rdflib/plugins/serializers/trix.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
*args: Any,
**kwargs: Any,
) -> None:
nm = self.store.namespace_manager
Expand Down
2 changes: 2 additions & 0 deletions rdflib/plugins/serializers/turtle.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
*args: Any,
sort: Optional[bool] = None,
spacious: Optional[bool] = None,
**kwargs: Any,
) -> None:
Expand Down
5 changes: 3 additions & 2 deletions rdflib/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@


class Serializer:
def __init__(self, store: Graph):
def __init__(self, store: Graph) -> None:
self.store: Graph = store
self.encoding: str = "utf-8"
self.base: Optional[str] = None
Expand All @@ -36,7 +36,8 @@ def serialize(
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
**args: Any,
*args: Any,
**kwargs: Any,
) -> None:
"""Abstract method"""

Expand Down
94 changes: 94 additions & 0 deletions test/test_turtle_sort_issue1890.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3

# Portions of this file contributed by NIST are governed by the
# following statement:
#
# This software was developed at the National Institute of Standards
# and Technology by employees of the Federal Government in the course
# of their official duties. Pursuant to Title 17 Section 105 of the
# United States Code, this software is not subject to copyright
# protection within the United States. NIST assumes no responsibility
# whatsoever for its use by other parties, and makes no guarantees,
# expressed or implied, about its quality, reliability, or any other
# characteristic.
#
# We would appreciate acknowledgement if the software is used.

import random
from collections import defaultdict

from rdflib import RDFS, BNode, Graph, Literal, Namespace, URIRef

EX = Namespace("http://example.org/ex/")


def test_sort_semiblank_graph() -> None:
"""
This test reviews whether the output of the Turtle form is
consistent when involving repeated generates with blank nodes.
"""

serialization_counter: defaultdict[str, int] = defaultdict(int)

first_graph_text: str = ""

# Use a fixed sequence of once-but-no-longer random values for more
# consistent test results.
nonrandom_shuffler = random.Random(1234)
for x in range(1, 10):
graph = Graph()
graph.bind("ex", EX)
graph.bind("rdfs", RDFS)

graph.add((EX.A, RDFS.comment, Literal("Thing A")))
graph.add((EX.B, RDFS.comment, Literal("Thing B")))
graph.add((EX.C, RDFS.comment, Literal("Thing C")))

nodes: list[URIRef] = [EX.A, EX.B, EX.C, EX.B]
nonrandom_shuffler.shuffle(nodes)
for node in nodes:
# Instantiate one bnode per URIRef node.
graph.add((BNode(), RDFS.seeAlso, node))

nesteds: list[URIRef] = [EX.A, EX.B, EX.C]
nonrandom_shuffler.shuffle(nesteds)
for nested in nesteds:
# Instantiate a nested node reference.
outer_node = BNode()
inner_node = BNode()
graph.add((outer_node, EX.has, inner_node))
graph.add((inner_node, RDFS.seeAlso, nested))

graph_text = graph.serialize(format="turtle", sort=True)
if first_graph_text == "":
first_graph_text = graph_text

serialization_counter[graph_text] += 1

expected_serialization = """
@prefix ex: <http://example.org/ex/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

ex:A rdfs:comment "Thing A" .

ex:B rdfs:comment "Thing B" .

ex:C rdfs:comment "Thing C" .

[] ex:has [ rdfs:seeAlso ex:A ] .

[] ex:has [ rdfs:seeAlso ex:B ] .

[] ex:has [ rdfs:seeAlso ex:C ] .

[] rdfs:seeAlso ex:A .

[] rdfs:seeAlso ex:B .

[] rdfs:seeAlso ex:B .

[] rdfs:seeAlso ex:C .
"""

assert expected_serialization.strip() == first_graph_text.strip()
assert 1 == len(serialization_counter)
Loading