Merge pull request #102 from casework/HotFix-sparql-query-result-types

Add SPARQL query result type assertions
casework · Mar 20, 2023 · 5cd80fc · 5cd80fc
2 parents bb18283 + 6b9e77b
commit 5cd80fc
Show file tree

Hide file tree

Showing 10 changed files with 78 additions and 35 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,14 +1,14 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 22.10.0
+    rev: 23.1.0
     hooks:
       - id: black
   - repo: https://github.com/pycqa/flake8
-    rev: 5.0.4
+    rev: 6.0.0
     hooks:
       - id: flake8
   - repo: https://github.com/pycqa/isort
-    rev: 5.10.1
+    rev: 5.12.0
     hooks:
       - id: isort
         name: isort (python)
diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py
@@ -39,6 +39,7 @@
 
 DEFAULT_PREFIX = "http://example.org/kb/"
 
+
 # Shortcut syntax for defining an immutable named tuple is noted here:
 # https://docs.python.org/3/library/typing.html#typing.NamedTuple
 # via the "See also" box here: https://docs.python.org/3/library/collections.html#collections.namedtuple

diff --git a/case_utils/case_sparql_construct/__init__.py b/case_utils/case_sparql_construct/__init__.py
@@ -98,10 +98,11 @@ def main() -> None:
     construct_query_result = in_graph.query(construct_query_object)
     _logger.debug("type(construct_query_result) = %r." % type(construct_query_result))
     _logger.debug("len(construct_query_result) = %d." % len(construct_query_result))
-    for (row_no, row) in enumerate(construct_query_result):
+    for row_no, row in enumerate(construct_query_result):
+        assert isinstance(row, tuple)
         if row_no == 0:
             _logger.debug("row[0] = %r." % (row,))
-        out_graph.add(row)
+        out_graph.add((row[0], row[1], row[2]))
 
     output_format = None
     if args.output_format is None:

diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py
@@ -86,26 +86,27 @@ def graph_and_query_to_data_frame(
     select_query_object = rdflib.plugins.sparql.processor.prepareQuery(
         select_query_text, initNs=nsdict
     )
-    for (row_no, row) in enumerate(_graph.query(select_query_object)):
+    for row_no, row in enumerate(_graph.query(select_query_object)):
+        assert isinstance(row, rdflib.query.ResultRow)
         tally = row_no + 1
         record = []
-        for (column_no, column) in enumerate(row):
+        for column_no, column in enumerate(row):
             if column is None:
                 column_value = ""
-            elif (
-                isinstance(column, rdflib.term.Literal)
-                and column.datatype == NS_XSD.hexBinary
-            ):
-                # Use hexlify to convert xsd:hexBinary to ASCII.
-                # The render to ASCII is in support of this script rendering results for website viewing.
-                # .decode() is because hexlify returns bytes.
-                column_value = binascii.hexlify(column.toPython()).decode()
+            elif isinstance(column, rdflib.term.Literal):
+                if column.datatype == NS_XSD.hexBinary:
+                    # Use hexlify to convert xsd:hexBinary to ASCII.
+                    # The render to ASCII is in support of this script rendering results for website viewing.
+                    # .decode() is because hexlify returns bytes.
+                    column_value = binascii.hexlify(column.toPython()).decode()
+                else:
+                    column_value = column.toPython()
             elif isinstance(column, rdflib.URIRef):
                 if use_prefixes:
                     column_value = graph.namespace_manager.qname(column.toPython())
                 else:
                     column_value = column.toPython()
-            else:
+            elif isinstance(column, rdflib.BNode):
                 column_value = column.toPython()
             if row_no == 0:
                 _logger.debug("row[0]column[%d] = %r." % (column_no, column_value))

diff --git a/case_utils/ontology/src/ontology_and_version_iris.py b/case_utils/ontology/src/ontology_and_version_iris.py
@@ -28,7 +28,8 @@
 
 def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
     """
-    This function is purposefully distinct from the function used in case_validate.  Within this script, the publishing history of CASE and UCO is reviewed."""
+    This function is purposefully distinct from the function used in case_validate.  Within this script, the publishing history of CASE and UCO is reviewed.
+    """
     concept_iri = str(n_concept)
     return (
         concept_iri.startswith("https://ontology.unifiedcyberontology.org/")

diff --git a/tests/Makefile b/tests/Makefile
@@ -81,12 +81,13 @@ check-isomorphic_diff: \
 	  --directory isomorphic_diff \
 	  check
 
-# TODO - Fix type signatures in UCO test.
+# TODO - Add and strict-check type signatures for CASE and UCO tests.
 check-mypy: \
   .venv.done.log
 	source venv/bin/activate \
 	  && mypy \
 	    --exclude case_utils/case_validate/case_test_examples/test_case_validation.py \
+	    --exclude case_utils/case_validate/uco_test_examples/test_uco_validation.py \
 	    --exclude venv \
 	    --strict \
 	    $(top_srcdir)/case_utils \

diff --git a/tests/case_utils/case_file/test_case_file.py b/tests/case_utils/case_file/test_case_file.py
@@ -85,7 +85,11 @@ def test_confirm_hashes(graph_case_file: rdflib.Graph) -> None:
     )
 
     for result in graph_case_file.query(query_object):
-        (l_hash_method, l_hash_value) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        assert isinstance(result[1], rdflib.Literal)
+        l_hash_method = result[0]
+        l_hash_value = result[1]
         # .toPython() with the non-XSD datatype returns the original Literal object again.  Hence, str().
         hash_method = str(l_hash_method)
         hash_value = binascii.hexlify(l_hash_value.toPython()).decode().upper()
@@ -117,13 +121,15 @@ def test_confirm_mtime(
 
     n_observable_object = None
     for result in graph_case_file_disable_hashes.query(query_object):
+        assert isinstance(result, rdflib.query.ResultRow)
         (n_observable_object,) = result
     assert (
         n_observable_object is not None
     ), "File object with expected mtime not found in hashless graph."
 
     n_observable_object = None
     for result in graph_case_file.query(query_object):
+        assert isinstance(result, rdflib.query.ResultRow)
         (n_observable_object,) = result
     assert (
         n_observable_object is not None

diff --git a/tests/case_utils/case_sparql_construct/test_case_sparql_construct.py b/tests/case_utils/case_sparql_construct/test_case_sparql_construct.py
@@ -30,6 +30,8 @@ def _test_subclass_templates_result(filename: str, expected: typing.Set[str]) ->
 }
 """
     for result in graph.query(query_string):
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.URIRef)
         n_entity = result[0]
         computed.add(n_entity.toPython())
     assert expected == computed
@@ -54,7 +56,11 @@ def _test_w3_templates_with_blank_nodes_result(filename: str) -> None:
 }
 """
     for result in graph.query(query_string):
-        (l_given_name, l_family_name) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        assert isinstance(result[1], rdflib.term.Literal)
+        l_given_name = result[0]
+        l_family_name = result[1]
         computed.add((l_given_name.toPython(), l_family_name.toPython()))
     assert expected == computed
 

diff --git a/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py
@@ -36,9 +36,9 @@
 )
 
 
-def make_data_frame_to_json_table_text_parameters() -> typing.Iterator[
-    typing.Tuple[str, str, bool, bool]
-]:
+def make_data_frame_to_json_table_text_parameters() -> (
+    typing.Iterator[typing.Tuple[str, str, bool, bool]]
+):
     for use_header in [False, True]:
         for use_index in [False, True]:
             for output_mode in ["csv", "html", "json", "md", "tsv"]:

diff --git a/tests/hexbinary/test_hexbinary.py b/tests/hexbinary/test_hexbinary.py
@@ -80,7 +80,9 @@ def test_sparql_syntax_bind_boolean() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -100,7 +102,9 @@ def test_pytest_syntax_xfail() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -119,7 +123,9 @@ def test_sparql_syntax_integer_coercion() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -138,7 +144,9 @@ def test_sparql_syntax_integer_cast() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -158,7 +166,9 @@ def test_sparql_cast_custom_type() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -174,7 +184,9 @@ def test_sparql_compare_hexbinary_mixcase() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -190,7 +202,9 @@ def test_sparql_compare_hexbinary_matchcase() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -206,7 +220,9 @@ def test_sparql_compare_hexbinarycanonical_matchcase() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -226,7 +242,9 @@ def test_sparql_compare_hexbinarycanonical_mixcase() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -246,7 +264,9 @@ def test_sparql_compare_hb_hbc_mixcase() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -266,7 +286,9 @@ def test_sparql_compare_hb_hbc_mixcase_cast() -> None:
 }
 """
     ):
-        (l_value,) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.Literal)
+        l_value = result[0]
         confirmed = l_value.toPython()
     assert confirmed
 
@@ -315,7 +337,11 @@ def _query_all_value_matches(graph: rdflib.Graph) -> typing.Set[str]:
   FILTER ( ?nNode1 != ?nNode2 )
 }"""
     ):
-        (n_node1, n_node2) = result
+        assert isinstance(result, rdflib.query.ResultRow)
+        assert isinstance(result[0], rdflib.URIRef)
+        assert isinstance(result[1], rdflib.URIRef)
+        n_node1 = result[0]
+        n_node2 = result[1]
         computed.add(n_node1.toPython())
         computed.add(n_node2.toPython())
     return computed