From 1e0298285b5a65a2858d1400e8acac377ab36b62 Mon Sep 17 00:00:00 2001 From: "Francesca L. Bleken" <48128015+francescalb@users.noreply.github.com> Date: Fri, 26 Jan 2024 15:53:22 +0100 Subject: [PATCH] Update save recursive and layout (#710) # Description Update layout and fix recursive saving of ontology * save now returns the path to the saved ontology. This change does not change the previous usage. This is practical when the filename is generated and not given. * generated layout is updated so that the root is (protocol+domain) if there are more of these. Otherwise the generated layout is as before. * updated test_save to check emmo-structure, recursive saving, also for emmo based domain ontologies that import ontologies with various starts of iris ## Type of change - [x] Bug fix. - [x] New feature. - [x] Documentation update. - [x] Test update. ## Checklist This checklist can be used as a help for the reviewer. - [ ] Is the code easy to read and understand? - [ ] Are comments for humans to read, not computers to disregard? - [ ] Does a new feature has an accompanying new test (in the CI or unit testing schemes)? - [ ] Has the documentation been updated as necessary? - [ ] Does this close the issue? - [ ] Is the change limited to the issue? - [ ] Are errors handled for all outcomes? - [ ] Does the new feature provide new restrictions on dependencies, and if so is this documented? ## Comments --------- Co-authored-by: Jesper Friis --- ontopy/ontology.py | 66 +++++--- ontopy/utils.py | 24 ++- tests/test_ontology_squash.py | 23 --- tests/test_save.py | 180 +++++++++++++++++++-- tests/testonto/catalog-v001.xml | 2 + tests/testonto/dummychameo/dummychameo.ttl | 8 + tests/testonto/dummyemmo/catalog-v001.xml | 6 + tests/testonto/dummyemmo/isq.ttl | 5 + tests/testonto/dummyonto_w_dummyemmo.ttl | 14 ++ 9 files changed, 263 insertions(+), 65 deletions(-) delete mode 100644 tests/test_ontology_squash.py create mode 100644 tests/testonto/dummychameo/dummychameo.ttl create mode 100644 tests/testonto/dummyemmo/catalog-v001.xml create mode 100644 tests/testonto/dummyemmo/isq.ttl create mode 100644 tests/testonto/dummyonto_w_dummyemmo.ttl diff --git a/ontopy/ontology.py b/ontopy/ontology.py index 7820ebee3..4857e128c 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -871,7 +871,7 @@ def save( write_catalog_file=False, append_catalog=False, catalog_file="catalog-v001.xml", - ): + ) -> Path: """Writes the ontology to file. Parameters @@ -891,10 +891,15 @@ def save( recursive: bool Whether to save imported ontologies recursively. This is commonly combined with `filename=None`, `dir` and `mkdir`. + Note that depending on the structure of the ontology and + all imports the ontology might end up in a subdirectory. + If filename is given, the ontology is saved to the given + directory. + The path to the final location is returned. squash: bool If true, rdflib will be used to save the current ontology together with all its sub-ontologies into `filename`. - It make no sense to combine this with `recursive`. + It makes no sense to combine this with `recursive`. write_catalog_file: bool Whether to also write a catalog file to disk. append_catalog: bool @@ -902,6 +907,10 @@ def save( catalog_file: str | Path Name of catalog file. If not an absolute path, it is prepended to `dir`. + + Returns + ------- + The path to the saved ontology. """ # pylint: disable=redefined-builtin,too-many-arguments # pylint: disable=too-many-statements,too-many-branches @@ -922,28 +931,31 @@ def save( "'Known issues' section of the README." ) ) - revmap = {value: key for key, value in FMAP.items()} if filename is None: if format: fmt = revmap.get(format, format) - filename = f"{self.name}.{fmt}" + file = f"{self.name}.{fmt}" else: raise TypeError("`filename` and `format` cannot both be None.") - filename = os.path.join(dir, filename) - dir = Path(filename).resolve().parent + else: + file = filename + filepath = os.path.join(dir, file) + returnpath = filepath + + dir = Path(filepath).resolve().parent if mkdir: - outdir = Path(filename).parent.resolve() + outdir = Path(filepath).parent.resolve() if not outdir.exists(): outdir.mkdir(parents=True) if not format: - format = guess_format(filename, fmap=FMAP) + format = guess_format(file, fmap=FMAP) fmt = revmap.get(format, format) - if overwrite and filename and os.path.exists(filename): - os.remove(filename) + if overwrite and os.path.exists(filepath): + os.remove(filepath) if recursive: if squash: @@ -951,7 +963,12 @@ def save( "`recursive` and `squash` should not both be true" ) layout = directory_layout(self) - + if filename: + layout[self] = file.rstrip(f".{fmt}") + # Update path to where the ontology is saved + # Note that filename should include format + # when given + returnpath = Path(dir) / f"{layout[self]}.{fmt}" for onto, path in layout.items(): fname = Path(dir) / f"{path}.{fmt}" onto.save( @@ -981,16 +998,7 @@ def save( directory=dir, append=append_catalog, ) - - elif write_catalog_file: - write_catalog( - {self.get_version(as_iri=True): filename}, - output=catalog_file, - directory=dir, - append=append_catalog, - ) - - if squash: + elif squash: URIRef, RDF, OWL = rdflib.URIRef, rdflib.RDF, rdflib.OWL iri = self.iri if self.iri else self.base_iri graph = self.world.as_rdflib_graph() @@ -1010,9 +1018,9 @@ def save( graph.remove((s, p, o)) graph.add((URIRef(self.iri), p, o)) - graph.serialize(destination=filename, format=format) + graph.serialize(destination=filepath, format=format) elif format in OWLREADY2_FORMATS: - super().save(file=filename, format=fmt) + super().save(file=filepath, format=fmt) else: # The try-finally clause is needed for cleanup and because # we have to provide delete=False to NamedTemporaryFile @@ -1040,10 +1048,20 @@ def save( ): graph.remove((s, p, o)) graph.add((rdflib.URIRef(self.iri), p, o)) - graph.serialize(destination=filename, format=format) + graph.serialize(destination=filepath, format=format) finally: os.remove(tmpfile) + if write_catalog_file and not recursive: + write_catalog( + {self.get_version(as_iri=True): filepath}, + output=catalog_file, + directory=dir, + append=append_catalog, + ) + + return Path(returnpath) + def get_imported_ontologies(self, recursive=False): """Return a list with imported ontologies. diff --git a/ontopy/utils.py b/ontopy/utils.py index fb01cc97d..9727a4664 100644 --- a/ontopy/utils.py +++ b/ontopy/utils.py @@ -786,6 +786,14 @@ def directory_layout(onto): where `ontoA`, `ontoB` and `ontoC` are imported Ontology objects. """ + all_imported = [ + imported.base_iri for imported in onto.indirectly_imported_ontologies() + ] + # get protocol and domain of all imported ontologies + namespace_roots = set() + for iri in all_imported: + protocol, domain, *_ = urllib.parse.urlsplit(iri) + namespace_roots.add("://".join([protocol, domain])) def recur(o): baseiri = o.base_iri.rstrip("/#") @@ -811,10 +819,18 @@ def recur(o): layout = {} recur(onto) - # Strip off initial common prefix from all paths - prefix = os.path.commonprefix(list(layout.values())) - for o, path in layout.items(): - layout[o] = path[len(prefix) :].lstrip("/") + if len(namespace_roots) == 1: + prefix = os.path.commonprefix(list(layout.values())) + for o, path in layout.items(): + layout[o] = path[len(prefix) :].lstrip("/") + else: + for o, path in layout.items(): + for namespace_root in namespace_roots: + if path.startswith(namespace_root): + layout[o] = ( + urllib.parse.urlsplit(namespace_root)[1] + + path[len(namespace_root) :] + ) return layout diff --git a/tests/test_ontology_squash.py b/tests/test_ontology_squash.py deleted file mode 100644 index 9809c72ce..000000000 --- a/tests/test_ontology_squash.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Test the Ontology.save(squash=True, ...)""" - - -def test_ontology_squash(): - import re - from pathlib import Path - from ontopy import get_ontology - - repo_dir = Path(__file__).resolve().parent.parent - onto_dir = repo_dir / "tests" / "testonto" - out_dir = repo_dir / "tests" / "output" - - testonto = get_ontology(onto_dir / "testonto.ttl").load() - - testonto.save(out_dir / "testonto_squash.ttl", squash=True) - - with open(out_dir / "testonto_squash.ttl", "r") as f: - txt = f.read() - - s = re.findall(r".* a owl:Ontology", txt) - assert len(s) == 1 - assert s[0].startswith("") - assert len(re.findall(r"owl:imports", txt)) == 0 diff --git a/tests/test_save.py b/tests/test_save.py index 765aefbd0..43ae85808 100755 --- a/tests/test_save.py +++ b/tests/test_save.py @@ -26,7 +26,6 @@ def test_save( testonto.save(tmpdir / "testonto_saved.ttl") # check that the file is in tmpdir assert (tmpdir / "testonto_saved.ttl").exists() - testonto.save(format="rdfxml") # provide a format and filename testonto.save(tmpdir / "testonto_saved.owl", format="rdfxml") @@ -37,6 +36,7 @@ def test_save( # the file will be saved in the current directory testonto.save(format="rdfxml") assert Path(testonto.name + ".rdfxml").exists() + # check if testonto_saved.owl and testonto.rdfxml are identical files with open(tmpdir / "testonto_saved.owl") as f: owlfile = f.read() @@ -65,6 +65,9 @@ def test_save( # 4. save testonto to testonto.owl again, but with overwrite=True # 5. check that testonto.owl is the same as testonto_saved.owl # NB! this is not currently working, issue #685 + # It might be that this intentional behaviour of save should be changed. + # If so, the tests should change accordingly. + # This should be addressed in issue #685 # 1. with open(tmpdir / "testonto_saved.owl") as f: @@ -89,35 +92,184 @@ def test_save( owlfile2 = f.read() assert owlfile == owlfile2 - # Test that the ontology is saved recursively when deisred + # Test that the ontology is saved recursively only when desired testonto.save( - format="ttl", dir=tmpdir / "recursively", mkdir=True, recursive=True + format="ttl", + dir=tmpdir / "recursively", + mkdir=True, + recursive=False, ) assert (tmpdir / "recursively" / "testonto.ttl").exists() - # Recursive save is not working . Issue #687 - # assert (tmpdir / "recursively" / "models.ttl").exists() + assert (tmpdir / "recursively" / "models.ttl").exists() == False + + testonto.save( + format="ttl", + dir=tmpdir / "recursively", + mkdir=True, + recursive=True, + ) + assert (tmpdir / "recursively" / "models.ttl").exists() + - # squash merge during save +def test_ontology_squash(): + import re + from pathlib import Path + from ontopy import get_ontology - # Write catalogfile + repo_dir = Path(__file__).resolve().parent.parent + onto_dir = repo_dir / "tests" / "testonto" + out_dir = repo_dir / "tests" / "output" - # append_catalog + testonto = get_ontology(onto_dir / "testonto.ttl").load() - # catalog_filename + testonto.save(out_dir / "testonto_squash.ttl", squash=True) + with open(out_dir / "testonto_squash.ttl", "r") as f: + txt = f.read() -# Simple working tests without pytest getting in the way - feel free to change to pytest + s = re.findall(r".* a owl:Ontology", txt) + assert len(s) == 1 + assert s[0].startswith("") + assert len(re.findall(r"owl:imports", txt)) == 0 -if True: # Whether to test for EMMO + +# Simple working tests without pytest getting in the way - feel free to change to pytest +def test_save_emmo( + tmpdir: "Path", + repo_dir: "Path", +) -> None: + import os from pathlib import Path - from emmopy import get_emmo + from ontopy import get_ontology + + # For debugging purposes tmpdir can be set to a directory + # in the current directory: test_save_dir + # Remember to remove the directory after testing + debug = False + if debug: + tmpdir = repo_dir / "tests" / "test_save_dir" + import os + + os.makedirs(tmpdir, exist_ok=True) + emmo = get_ontology( + "https://raw.githubusercontent.com/emmo-repo/EMMO/1.0.0-beta4/emmo.ttl" + ).load() + + # Since version is missing in some imported ontologies (at least in periodic_table) + # we need to fix that. + # Note that ths is fix of an error in EMMO-1.0.0-beta4 + version = emmo.get_version() + # for onto in emmo.indirectly_imported_ontologies(): + # try: + # onto.get_version(as_iri=True) + # except TypeError: + # onto.set_version(version) + # # print(onto, onto.get_version(as_iri=True)) - emmo = get_emmo() emmo.save( format="turtle", - dir=Path(__file__).absolute().parent / "outdir", + dir=tmpdir / "emmosaved", + recursive=True, + mkdir=True, + write_catalog_file=True, + ) + assert set(os.listdir(tmpdir / "emmosaved")) == { + "catalog-v001.xml", + "disciplines", + "emmo.ttl", + "mereocausality", + "multiperspective", + "perspectives", + } + + assert set(os.listdir(tmpdir / "emmosaved" / "disciplines")) == { + "materials.ttl", + "math.ttl", + "computerscience.ttl", + "chemistry.ttl", + "unitsextension.ttl", + "catalog-v001.xml", + "isq.ttl", + "periodictable.ttl", + "metrology.ttl", + "siunits.ttl", + "disciplines.ttl", + "manufacturing.ttl", + "models.ttl", + } + + +def test_save_emmo_domain_ontology( + tmpdir: "Path", + repo_dir: "Path", +) -> None: + import os + from pathlib import Path + from ontopy.utils import directory_layout + from ontopy import get_ontology + + # For debugging purposes tmpdir can be set to a directory + # in the current directory: test_save_dir + # Remember to remove the directory after testing + debug = True + if debug: + tmpdir = repo_dir / "tests" / "test_save_dir" + import os + + os.makedirs(tmpdir, exist_ok=True) + + # This test was created with the domain-electrochemistry ontology which imports + # emmo submodules as well as chameo. + # Also, it is important that the version domain-electrochemistry has base_iri + # starting with https://w3id.org/emmo/ + # while emmo and chameo start with https://w3id.org/emmo/ + # For faster tests a dummyontology was created. + # onto = get_ontology('https://raw.githubusercontent.com/emmo-repo/domain-electrochemistry/master/electrochemistry.ttl').load() + onto = get_ontology( + repo_dir / "tests" / "testonto" / "dummyonto_w_dummyemmo.ttl" + ).load() + + outputdir = tmpdir / "saved_emmo_domain_ontology" + savedfile = onto.save( + format="rdfxml", + dir=outputdir, + recursive=True, + mkdir=True, + write_catalog_file=True, + ) + assert get_ontology(savedfile).load() + assert set(os.listdir(outputdir)) == {"emmo.info", "w3id.org"} + assert set( + os.listdir(outputdir / "emmo.info" / "emmo" / "domain" / "chameo") + ) == {"chameo.rdfxml", "catalog-v001.xml"} + assert set( + os.listdir(outputdir / "emmo.info" / "emmo" / "disciplines") + ) == {"isq.rdfxml", "catalog-v001.xml"} + assert set(os.listdir(outputdir / "w3id.org" / "emmo" / "domain")) == { + "dummyonto.rdfxml", + "catalog-v001.xml", + } + + # Test saving but giving filename. It should then be saved in the parent directory + outputdir2 = tmpdir / "saved_emmo_domain_ontology2" + savedfile2 = onto.save( + format="rdfxml", + dir=outputdir2, recursive=True, mkdir=True, write_catalog_file=True, + filename="dummyonto.rdfxml", ) + assert get_ontology(savedfile2).load() + assert set(os.listdir(outputdir2)) == { + "emmo.info", + "dummyonto.rdfxml", + "catalog-v001.xml", + } + assert set( + os.listdir(outputdir / "emmo.info" / "emmo" / "domain" / "chameo") + ) == {"chameo.rdfxml", "catalog-v001.xml"} + assert set( + os.listdir(outputdir / "emmo.info" / "emmo" / "disciplines") + ) == {"isq.rdfxml", "catalog-v001.xml"} diff --git a/tests/testonto/catalog-v001.xml b/tests/testonto/catalog-v001.xml index 2e9c269dc..acc7f7926 100644 --- a/tests/testonto/catalog-v001.xml +++ b/tests/testonto/catalog-v001.xml @@ -5,5 +5,7 @@ + + diff --git a/tests/testonto/dummychameo/dummychameo.ttl b/tests/testonto/dummychameo/dummychameo.ttl new file mode 100644 index 000000000..c17fd4b76 --- /dev/null +++ b/tests/testonto/dummychameo/dummychameo.ttl @@ -0,0 +1,8 @@ +@prefix : . +@prefix dcterms: . +@prefix owl: . + + a owl:Ontology ; + dcterms:license "https://creativecommons.org/licenses/by/4.0/legalcode" ; + owl:versionIRI ; + owl:versionInfo "1.0.0-beta3" . diff --git a/tests/testonto/dummyemmo/catalog-v001.xml b/tests/testonto/dummyemmo/catalog-v001.xml new file mode 100644 index 000000000..aa2fa999b --- /dev/null +++ b/tests/testonto/dummyemmo/catalog-v001.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/tests/testonto/dummyemmo/isq.ttl b/tests/testonto/dummyemmo/isq.ttl new file mode 100644 index 000000000..5ff18bf15 --- /dev/null +++ b/tests/testonto/dummyemmo/isq.ttl @@ -0,0 +1,5 @@ +@prefix owl: . + + a owl:Ontology ; + owl:versionIRI ; + owl:versionInfo "1.0.0-beta5" . diff --git a/tests/testonto/dummyonto_w_dummyemmo.ttl b/tests/testonto/dummyonto_w_dummyemmo.ttl new file mode 100644 index 000000000..ff9b3696e --- /dev/null +++ b/tests/testonto/dummyonto_w_dummyemmo.ttl @@ -0,0 +1,14 @@ +@prefix : . +@prefix owl: . +@prefix rdf: . +@prefix dcterms: . +@base . + + rdf:type owl:Ontology ; + owl:versionIRI ; + owl:imports , + ; + dcterms:abstract "A dummy ontology importing differen ontologies to test the save and copy functions in ontopy.ontology."@en ; + dcterms:creator "Francesca L. Bleken" ; + dcterms:license "https://creativecommons.org/licenses/by/4.0/legalcode" ; + owl:versionInfo "0.1.0" .