Merge pull request #696 from emmo-repo/directory_layout

Added directory_layout() function. The function suggests a directory layout from the IRIs of all the imported ontologies. The generation of catalog files has been updated and some bugs in write_catalog() has been fixed. Including: - irimap dict is now not modified as a side-effect when relative_paths is true - relative paths in irimap are not modified - absolute paths written to the catalog file are now relative to the directory or the catalog file when relative_path=True The read_catalog() has gotten a new argument relative_to which is needed when asking for relative paths. Using this argument ensures that the catalog test is now not dependent on the current working directory when invoking the test.
emmo-repo · Jan 8, 2024 · 77c3bdc · 77c3bdc
2 parents 04f145f + 5b10822
commit 77c3bdc
Show file tree

Hide file tree

Showing 11 changed files with 306 additions and 69 deletions.
diff --git a/ontopy/ontology.py b/ontopy/ontology.py
@@ -35,6 +35,7 @@
     write_catalog,
     infer_version,
     convert_imported,
+    directory_layout,
     FMAP,
     IncompatibleVersion,
     isinteractive,
@@ -934,23 +935,46 @@ def save(
                 raise ValueError(
                     "`recursive` and `squash` should not both be true"
                 )
-            base = self.base_iri.rstrip("#/")
-            for onto in self.imported_ontologies:
-                obase = onto.base_iri.rstrip("#/")
-                newdir = Path(dir) / os.path.relpath(obase, base)
+            layout = directory_layout(self)
+
+            for onto, path in layout.items():
+                fname = Path(dir) / f"{path}.{fmt}"
                 onto.save(
-                    filename=None,
+                    filename=fname,
                     format=format,
-                    dir=newdir.resolve(),
+                    dir=dir,
                     mkdir=mkdir,
                     overwrite=overwrite,
-                    recursive=recursive,
-                    squash=squash,
-                    write_catalog_file=write_catalog_file,
-                    append_catalog=append_catalog,
-                    catalog_file=catalog_file,
+                    recursive=False,
+                    squash=False,
+                    write_catalog_file=False,
                 )
 
+            if write_catalog_file:
+                catalog_files = set()
+                irimap = {}
+                for onto, path in layout.items():
+                    irimap[
+                        onto.get_version(as_iri=True)
+                    ] = f"{dir}/{path}.{fmt}"
+                    catalog_files.add(Path(path).parent / catalog_file)
+
+                for catfile in catalog_files:
+                    write_catalog(
+                        irimap.copy(),
+                        output=catfile,
+                        directory=dir,
+                        append=append_catalog,
+                    )
+
+        elif write_catalog_file:
+            write_catalog(
+                {self.get_version(as_iri=True): filename},
+                output=catalog_file,
+                directory=dir,
+                append=append_catalog,
+            )
+
         if squash:
             from rdflib import (  # pylint:disable=import-outside-toplevel
                 URIRef,
@@ -980,35 +1004,13 @@ def save(
                     suffix=".owl", delete=False
                 ) as handle:
                     tmpfile = handle.name
-                super().save(tmpfile, format="rdfxml")
+                super().save(tmpfile, format="ntriples")
                 graph = rdflib.Graph()
-                graph.parse(tmpfile, format="xml")
+                graph.parse(tmpfile, format="ntriples")
                 graph.serialize(destination=filename, format=format)
             finally:
                 os.remove(tmpfile)
 
-        if write_catalog_file:
-            mappings = {}
-            base = self.base_iri.rstrip("#/")
-
-            def append(onto):
-                obase = onto.base_iri.rstrip("#/")
-                newdir = Path(dir) / os.path.relpath(obase, base)
-                newpath = newdir.resolve() / f"{onto.name}.{fmt}"
-                relpath = os.path.relpath(newpath, dir)
-                mappings[onto.get_version(as_iri=True)] = str(relpath)
-                for imported in onto.imported_ontologies:
-                    append(imported)
-
-            if recursive:
-                append(self)
-            write_catalog(
-                mappings,
-                output=catalog_file,
-                directory=dir,
-                append=append_catalog,
-            )
-
     def get_imported_ontologies(self, recursive=False):
         """Return a list with imported ontologies.
 
@@ -1939,6 +1941,14 @@ def new_annotation_property(
         """
         return self.new_entity(name, parent, "annotation_property")
 
+    def difference(self, other: owlready2.Ontology) -> set:
+        """Return a set of triples that are in this, but not in the
+        `other` ontology."""
+        # pylint: disable=invalid-name
+        s1 = set(self.get_unabbreviated_triples(blank="_:b"))
+        s2 = set(other.get_unabbreviated_triples(blank="_:b"))
+        return s1.difference(s2)
+
 
 class BlankNode:
     """Represents a blank node.
@@ -2006,31 +2016,31 @@ def _unabbreviate(
 
 
 def _get_unabbreviated_triples(
-    self, subject=None, predicate=None, obj=None, blank=None
+    onto, subject=None, predicate=None, obj=None, blank=None
 ):
     """Help function returning all matching triples unabbreviated.
 
     If `blank` is given, it will be used to represent blank nodes.
     """
     # pylint: disable=invalid-name
     abb = (
-        None if subject is None else self._abbreviate(subject),
-        None if predicate is None else self._abbreviate(predicate),
-        None if obj is None else self._abbreviate(obj),
+        None if subject is None else onto._abbreviate(subject),
+        None if predicate is None else onto._abbreviate(predicate),
+        None if obj is None else onto._abbreviate(obj),
     )
-    for s, p, o in self._get_obj_triples_spo_spo(*abb):
+    for s, p, o in onto._get_obj_triples_spo_spo(*abb):
         yield (
-            _unabbreviate(self, s, blank=blank),
-            _unabbreviate(self, p, blank=blank),
-            _unabbreviate(self, o, blank=blank),
+            _unabbreviate(onto, s, blank=blank),
+            _unabbreviate(onto, p, blank=blank),
+            _unabbreviate(onto, o, blank=blank),
         )
-    for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=None):
+    for s, p, o, d in onto._get_data_triples_spod_spod(*abb, d=None):
         yield (
-            _unabbreviate(self, s, blank=blank),
-            _unabbreviate(self, p, blank=blank),
+            _unabbreviate(onto, s, blank=blank),
+            _unabbreviate(onto, p, blank=blank),
             f'"{o}"{d}'
             if isinstance(d, str)
-            else f'"{o}"^^{_unabbreviate(self, d)}'
+            else f'"{o}"^^{_unabbreviate(onto, d)}'
             if d
             else o,
         )
diff --git a/ontopy/utils.py b/ontopy/utils.py
@@ -304,6 +304,7 @@ def read_catalog(  # pylint: disable=too-many-locals,too-many-statements,too-man
     catalog_file="catalog-v001.xml",
     baseuri=None,
     recursive=False,
+    relative_to=None,
     return_paths=False,
     visited_iris=None,
     visited_paths=None,
@@ -327,6 +328,9 @@ def read_catalog(  # pylint: disable=too-many-locals,too-many-statements,too-man
 
     If `recursive` is true, catalog files in sub-folders are also read.
 
+    if `relative_to` is given, the paths in the returned dict will be
+    relative to this path.
+
     If `return_paths` is true, a set of directory paths to source
     files is returned in addition to the default dict.
 
@@ -335,6 +339,8 @@ def read_catalog(  # pylint: disable=too-many-locals,too-many-statements,too-man
 
     A ReadCatalogError is raised if the catalog file cannot be found.
     """
+    # pylint: disable=too-many-branches
+
     # Protocols supported by urllib.request
     web_protocols = "http://", "https://", "ftp://"
     uri = str(uri)  # in case uri is a pathlib.Path object
@@ -448,13 +454,18 @@ def load_uri(uri, dirname):
                     load_catalog(catalog)
 
     load_catalog(filepath)
+
+    if relative_to:
+        for iri, path in iris.items():
+            iris[iri] = os.path.relpath(path, relative_to)
+
     if return_paths:
         return iris, dirs
     return iris
 
 
 def write_catalog(
-    mappings: dict,
+    irimap: dict,
     output: "Union[str, Path]" = "catalog-v001.xml",
     directory: "Union[str, Path]" = ".",
     relative_paths: bool = True,
@@ -463,27 +474,29 @@ def write_catalog(
     """Write catalog file do disk.
 
     Args:
-        mappings: dict mapping ontology IRIs (name) to actual locations
+        irimap: dict mapping ontology IRIs (name) to actual locations
             (URIs).  It has the same format as the dict returned by
             read_catalog().
         output: name of catalog file.
         directory: directory path to the catalog file.  Only used if `output`
             is a relative path.
-        relative_paths: whether to write absolute or relative paths to
-            for file paths inside the catalog file.
+        relative_paths: whether to write file paths inside the catalog as
+            relative paths (instead of  absolute paths).
         append: whether to append to a possible existing catalog file.
             If false, an existing file will be overwritten.
     """
-    web_protocol = "http://", "https://", "ftp://"
+    filename = Path(directory) / output
+
     if relative_paths:
-        for key, item in mappings.items():
-            if not item.startswith(web_protocol):
-                mappings[key] = os.path.relpath(item, Path(directory).resolve())
-    filename = (Path(directory) / output).resolve()
+        irimap = irimap.copy()  # don't modify provided irimap
+        for iri, path in irimap.items():
+            if os.path.isabs(path):
+                irimap[iri] = os.path.relpath(path, filename.parent)
+
     if filename.exists() and append:
         iris = read_catalog(filename)
-        iris.update(mappings)
-        mappings = iris
+        iris.update(irimap)
+        irimap = iris
 
     res = [
         '<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
@@ -492,8 +505,8 @@ def write_catalog(
         '    <group id="Folder Repository, directory=, recursive=true, '
         'Auto-Update=false, version=2" prefer="public" xml:base="">',
     ]
-    for key, value in dict(mappings).items():
-        res.append(f'        <uri name="{key}" uri="{value}"/>')
+    for iri, path in irimap.items():
+        res.append(f'        <uri name="{iri}" uri="{path}"/>')
     res.append("    </group>")
     res.append("</catalog>")
     with open(filename, "wt") as handle:
@@ -745,3 +758,63 @@ def get_format(outfile: str, default: str, fmt: str = None):
     if not fmt:
         fmt = default
     return fmt.lstrip(".")
+
+
+def directory_layout(onto):
+    """Analyse IRIs of imported ontologies and suggested a directory
+    layout for saving recursively.
+
+    Arguments:
+        onto: Ontology to analyse.
+
+    Returns:
+        layout: A dict mapping ontology objects to relative path names
+            derived from the ontology IRIs. No file name extension are
+            added.
+
+    Example:
+        Assume that our ontology `onto` has IRI `ex:onto`. If it directly
+        or indirectly imports ontologies with IRIs `ex:A/ontoA`, `ex:B/ontoB`
+        and `ex:A/C/ontoC`, this function will return the following dict:
+
+            {
+                onto: "onto",
+                ontoA: "A/ontoA",
+                ontoB: "B/ontoB",
+                ontoC: "A/C/ontoC",
+            }
+
+        where `ontoA`, `ontoB` and `ontoC` are imported Ontology objects.
+    """
+    layout = {}
+
+    def recur(o):
+        for imported in o.imported_ontologies:
+            if imported not in layout:
+                recur(imported)
+        baseiri = o.base_iri.rstrip("/#")
+
+        # Some heuristics here to reproduce the EMMO layout.
+        # It might not apply to all ontologies, so maybe it should be
+        # made optional?  Alternatively, change EMMO ontology IRIs to
+        # match the directory layout.
+        emmolayout = (
+            any(
+                oo.base_iri.startswith(baseiri + "/")
+                for oo in o.imported_ontologies
+            )
+            or o.base_iri == "http://emmo.info/emmo/mereocausality#"
+        )
+
+        layout[o] = (
+            baseiri + "/" + os.path.basename(baseiri) if emmolayout else baseiri
+        )
+
+    recur(onto)
+
+    # Strip off initial common prefix from all paths
+    prefix = os.path.commonprefix(list(layout.values()))
+    for o, path in layout.items():
+        layout[o] = path[len(prefix) :].lstrip("/")
+
+    return layout
diff --git a/tests/test_catalog.py b/tests/test_catalog.py
@@ -7,6 +7,12 @@
 
 
 def test_catalog(repo_dir: "Path", tmpdir: "Path") -> None:
+    # if True:
+    #    from pathlib import Path
+    #    repo_dir = Path(__file__).resolve().parent.parent
+    #    tmpdir = repo_dir / "tests" / "xxx"
+    #    tmpdir.mkdir()
+
     ontodir = repo_dir / "tests" / "catalogs_for_testing"
     catalog_expected = {
         "http://emmo.info/testonto/0.1.0": str(ontodir / "testonto.ttl"),
@@ -71,14 +77,23 @@ def test_catalog(repo_dir: "Path", tmpdir: "Path") -> None:
 def test_write_catalog_choosing_relative_paths(
     repo_dir: "Path", tmpdir: "Path"
 ) -> None:
+    # if True:
+    #    from pathlib import Path
+    #    import shutil
+    #    repo_dir = Path(__file__).resolve().parent.parent
+    #    tmpdir = repo_dir / "tests" / "xxx"
+    #    if tmpdir.exists():
+    #        shutil.rmtree(tmpdir)
+    #    tmpdir.mkdir()
+
     ontodir = repo_dir / "tests" / "catalogs_for_testing"
-    catalog1 = read_catalog(str(ontodir))
+    catalog1 = read_catalog(ontodir, relative_to=ontodir)
     write_catalog(
         catalog1,
         output=(tmpdir / "cat-relative-paths.xml"),
         relative_paths=True,
     )
-    catalog2 = read_catalog(str(ontodir))
+    catalog2 = read_catalog(ontodir)
     write_catalog(
         catalog2,
         output=(tmpdir / "cat-absolute-paths.xml"),
@@ -101,8 +116,8 @@ def test_write_catalog_choosing_relative_paths(
     ontodir = repo_dir / "tests" / "catalogs_for_testing"
 
     catalog_expected_relative_paths = {
-        str("tests/catalogs_for_testing/testonto.ttl"),
-        str("tests/catalogs_for_testing/models.ttl"),
+        "testonto.ttl",
+        "models.ttl",
     }
 
     catalog_expected_absolute_paths = {

diff --git a/tests/test_excelparser/onto_only_classes.xlsx b/tests/test_excelparser/onto_only_classes.xlsx
diff --git a/tests/test_excelparser/result_ontology/fromexcelonto.ttl b/tests/test_excelparser/result_ontology/fromexcelonto.ttl
@@ -12,7 +12,7 @@
         "Jesper Friis"@en,
         "Sylvain Gouttebroze"@en ;
     dcterms:title "A test domain ontology"@en ;
-    owl:imports <http://emmo.info/emmo-inferred>,
+    owl:imports <http://emmo.info/emmo>,
         <http://ontology.info/ontology> ;
     owl:versionInfo "0.01"@en .
 

diff --git a/tests/test_excelparser/result_ontology/fromexcelonto_only_classes.ttl b/tests/test_excelparser/result_ontology/fromexcelonto_only_classes.ttl
@@ -12,7 +12,7 @@
         "Jesper Friis"@en,
         "Sylvain Gouttebroze"@en ;
     dcterms:title "A test domain ontology"@en ;
-    owl:imports <http://emmo.info/emmo-inferred>,
+    owl:imports <http://emmo.info/emmo>,
         <http://ontology.info/ontology> ;
     owl:versionInfo "0.01"@en .