diff --git a/docs/tools-instructions.md b/docs/tools-instructions.md index b41dfb011..53578dcdf 100644 --- a/docs/tools-instructions.md +++ b/docs/tools-instructions.md @@ -356,3 +356,8 @@ Since the catalog file will be overwritten in the above example writing output t ```console ontoconvert --recursive emmo.ttl owl/emmo.owl ``` + + +### Bugs +Since parsing the results from the reasoner is currently broken in Owlready2 (v0.37), a workaround has been added to ontoconvert. +This workaround only only supports FaCT++. Hence, HermiT and Pellet are currently not available. diff --git a/ontopy/factpluspluswrapper/sync_factpp.py b/ontopy/factpluspluswrapper/sync_factpp.py index 402c8b5a8..e92b2b906 100644 --- a/ontopy/factpluspluswrapper/sync_factpp.py +++ b/ontopy/factpluspluswrapper/sync_factpp.py @@ -23,10 +23,11 @@ RDF.type: "individual", OWL.equivalentClass: "class", OWL.equivalentProperty: "property", + OWL.sameAs: "individual", } -def sync_reasoner_factpp( # pylint: disable=too-many-locals,too-many-branches +def sync_reasoner_factpp( ontology_or_world=None, infer_property_values=False, debug=1 ): """Run FaCT++ reasoner and load the inferred relations back into @@ -41,6 +42,7 @@ def sync_reasoner_factpp( # pylint: disable=too-many-locals,too-many-branches debug : bool Whether to print debug info to standard output. """ + # pylint: disable=too-many-locals,too-many-branches,too-many-statements if isinstance(ontology_or_world, World): world = ontology_or_world elif isinstance(ontology_or_world, Ontology): @@ -62,7 +64,8 @@ def sync_reasoner_factpp( # pylint: disable=too-many-locals,too-many-branches world.graph.release_write_lock() # Not needed during reasoning try: - print("*** Prepare graph") + if debug: + print("*** Prepare graph") # Exclude owl:imports because they are not needed and can # cause trouble when loading the inferred ontology graph1 = rdflib.Graph() @@ -72,10 +75,12 @@ def sync_reasoner_factpp( # pylint: disable=too-many-locals,too-many-branches if predicate != OWL.imports: graph1.add((subject, predicate, obj)) - print("*** Run FaCT++ reasoner (and postprocess)") + if debug: + print("*** Run FaCT++ reasoner (and postprocess)") graph2 = FaCTPPGraph(graph1).inferred_graph() - print("*** Load inferred ontology") + if debug: + print("*** Load inferred ontology") # Check all rdfs:subClassOf relations in the inferred graph and add # them to the world if they are missing new_parents = defaultdict(list) @@ -117,7 +122,9 @@ def sync_reasoner_factpp( # pylint: disable=too-many-locals,too-many-branches if locked: world.graph.acquire_write_lock() # re-lock when applying results - print("*** Applying reasoning results") + if debug: + print("*** Applying reasoning results") + _apply_reasoning_results( world, ontology, debug, new_parents, new_equivs, entity_2_type ) diff --git a/ontopy/ontology.py b/ontopy/ontology.py index 98275f2c2..f689be8c5 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -16,8 +16,8 @@ import uuid import tempfile import types -import pathlib from typing import Union +from pathlib import Path from collections import defaultdict from collections.abc import Iterable @@ -32,6 +32,7 @@ from ontopy.utils import ( asstring, read_catalog, + write_catalog, infer_version, convert_imported, FMAP, @@ -84,11 +85,7 @@ def get_ontology(self, base_iri="emmo-inferred"): - "emmo-development": load latest inferred development version of EMMO """ - base_iri = ( - base_iri.as_uri() - if isinstance(base_iri, pathlib.Path) - else base_iri - ) + base_iri = base_iri.as_uri() if isinstance(base_iri, Path) else base_iri if base_iri == "emmo": base_iri = ( @@ -305,10 +302,10 @@ def get_by_label( return self.namespaces[label] if label_annotations is None: - annotations = (_.name for _ in self.label_annotations) + annotations = (a.name for a in self.label_annotations) else: annotations = ( - _.name if hasattr(_, "storid") else _ for _ in label_annotations + a.name if hasattr(a, "storid") else a for a in label_annotations ) for key in annotations: entity = self.search_one(**{key: label}) @@ -374,7 +371,7 @@ def remove_label_annotation(self, iri): raise ValueError(f"IRI not in ontology: {iri}") self._label_annotations.remove(label_annotation) - def load( # pylint: disable=too-many-arguments + def load( # pylint: disable=too-many-arguments,arguments-renamed self, only_local=False, filename=None, @@ -460,7 +457,7 @@ def _load( # pylint: disable=too-many-arguments,too-many-locals,too-many-branch catalog_file="catalog-v001.xml", **kwargs, ): - """Help function for _load().""" + """Help function for load().""" web_protocol = "http://", "https://", "ftp://" url = filename if filename else self.base_iri.rstrip("/#") @@ -612,19 +609,52 @@ def getmtime(path): ) def save( - self, filename=None, format=None, overwrite=False, **kwargs - ): # pylint: disable=redefined-builtin + self, + filename=None, + format=None, + dir=".", + mkdir=False, + overwrite=False, + recursive=False, + squash=False, + write_catalog_file=False, + append_catalog=False, + catalog_file="catalog-v001.xml", + ): """Writes the ontology to file. - If `overwrite` is `True` and filename exists, it will be removed - before saving. The default is to append an existing ontology. + Parameters + ---------- + filename: None | str | Path + Name of file to write to. If None, it defaults to the name + of the ontology with `format` as file extension. + format: str + Output format. The default is to infer it from `filename`. + dir: str | Path + If `filename` is a relative path, it is a relative path to `dir`. + mkdir: bool + Whether to create output directory if it does not exists. + owerwrite: bool + If true and `filename` exists, remove the existing file before + saving. The default is to append to an existing ontology. + recursive: bool + Whether to save imported ontologies recursively. This is + commonly combined with `filename=None`, `dir` and `mkdir`. + squash: bool + If true, rdflib will be used to save the current ontology + together with all its sub-ontologies into `filename`. + It make no sense to combine this with `recursive`. + write_catalog_file: bool + Whether to also write a catalog file to disk. + append_catalog: bool + Whether to append to an existing catalog file. + catalog_file: str | Path + Name of catalog file. If not an absolute path, it is prepended + to `dir`. """ - if overwrite and filename and os.path.exists(filename): - os.remove(filename) - - if not format: - format = guess_format(filename, fmap=FMAP) - + # pylint: disable=redefined-builtin,too-many-arguments + # pylint: disable=too-many-statements,too-many-branches + # pylint: disable=too-many-locals,arguments-renamed if not _validate_installed_version( package="rdflib", min_version="6.0.0" ) and format == FMAP.get("ttl", ""): @@ -642,16 +672,104 @@ def save( ) ) - if format in OWLREADY2_FORMATS: - revmap = {value: key for key, value in FMAP.items()} - super().save(file=filename, format=revmap[format], **kwargs) + revmap = {value: key for key, value in FMAP.items()} + + if filename is None: + if format: + fmt = revmap.get(format, format) + filename = f"{self.name}.{fmt}" + else: + TypeError("`filename` and `format` cannot both be None.") + filename = os.path.join(dir, filename) + dir = Path(filename).resolve().parent + + if mkdir: + outdir = Path(filename).parent.resolve() + if not outdir.exists(): + outdir.mkdir(parents=True) + + if not format: + format = guess_format(filename, fmap=FMAP) + fmt = revmap.get(format, format) + + if overwrite and filename and os.path.exists(filename): + os.remove(filename) + + EMMO = rdflib.Namespace( # pylint:disable=invalid-name + "http://emmo.info/emmo#" + ) + + if recursive: + if squash: + raise ValueError( + "`recursive` and `squash` should not both be true" + ) + base = self.base_iri.rstrip("#/") + for onto in self.imported_ontologies: + obase = onto.base_iri.rstrip("#/") + newdir = Path(dir) / os.path.relpath(obase, base) + onto.save( + filename=None, + format=format, + dir=newdir.resolve(), + mkdir=mkdir, + overwrite=overwrite, + recursive=recursive, + squash=squash, + write_catalog_file=write_catalog_file, + append_catalog=append_catalog, + catalog_file=catalog_file, + ) + + if squash: + from rdflib import ( # pylint:disable=import-outside-toplevel + URIRef, + RDF, + OWL, + ) + + graph = self.world.as_rdflib_graph() + graph.namespace_manager.bind("emmo", EMMO) + + # Remove anonymous namespace and imports + graph.remove((URIRef("http://anonymous"), RDF.type, OWL.Ontology)) + imports = list(graph.triples((None, OWL.imports, None))) + for triple in imports: + graph.remove(triple) + + graph.serialize(destination=filename, format=format) + elif format in OWLREADY2_FORMATS: + super().save(file=filename, format=fmt) else: with tempfile.NamedTemporaryFile(suffix=".owl") as handle: tmpname = handle.name - super().save(file=tmpname, format="rdfxml", **kwargs) - graph = rdflib.Graph() - graph.parse(tmpname, format="xml") - graph.serialize(destination=filename, format=format) + try: + super().save(file=tmpname, format="rdfxml") + graph = rdflib.Graph() + graph.namespace_manager.bind("emmo", EMMO) + graph.parse(tmpname, format="xml") + graph.serialize(destination=filename, format=format) + finally: + os.remove(tmpname) + + if write_catalog_file: + mappings = {} + base = self.base_iri.rstrip("#/") + + def append(onto): + obase = onto.base_iri.rstrip("#/") + newdir = Path(dir) / os.path.relpath(obase, base) + newpath = newdir.resolve() / f"{onto.name}.{fmt}" + relpath = os.path.relpath(newpath, dir) + mappings[onto.get_version(as_iri=True)] = str(relpath) + for imported in onto.imported_ontologies: + append(imported) + + if recursive: + append(self) + write_catalog( + mappings, output=catalog_file, dir=dir, append=append_catalog + ) def get_imported_ontologies(self, recursive=False): """Return a list with imported ontologies. @@ -837,23 +955,27 @@ def sync_reasoner( Keyword arguments are passed to the underlying owlready2 function. """ - if reasoner == "Pellet": + if reasoner == "FaCT++": + sync = sync_reasoner_factpp + elif reasoner == "Pellet": sync = owlready2.sync_reasoner_pellet elif reasoner == "HermiT": sync = owlready2.sync_reasoner_hermit - elif reasoner == "FaCT++": - sync = sync_reasoner_factpp else: raise ValueError( - f"unknown reasoner {reasoner!r}. Supported reasoners are " - '"Pellet", "HermiT" and "FaCT++".' + f"unknown reasoner {reasoner!r}. Supported reasoners " + 'are "Pellet", "HermiT" and "FaCT++".' ) - if include_imported: - with self: - sync(**kwargs) - else: - sync([self], **kwargs) + # For some reason we must visit all entities once before running + # the reasoner... + list(self.get_entities()) + + with self: + if include_imported: + sync(self.world, **kwargs) + else: + sync(self, **kwargs) def sync_attributes( # pylint: disable=too-many-branches self, @@ -914,11 +1036,10 @@ class prefLabel(owlready2.label): if not hasattr(ind, "prefLabel"): # no prefLabel - create new annotation property.. with self: - # pylint: disable=invalid-name,missing-class-docstring # pylint: disable=function-redefined class prefLabel(owlready2.label): - pass + iri = "http://www.w3.org/2004/02/skos/core#prefLabel" ind.prefLabel = [locstr(ind.name, lang="en")] elif not ind.prefLabel: diff --git a/ontopy/utils.py b/ontopy/utils.py index 381af26b2..c04d40921 100644 --- a/ontopy/utils.py +++ b/ontopy/utils.py @@ -6,7 +6,7 @@ import re import datetime import tempfile -import types +from pathlib import Path from typing import TYPE_CHECKING import urllib.request import urllib.parse @@ -260,6 +260,7 @@ def read_catalog( # pylint: disable=too-many-locals,too-many-statements,too-man # Protocols supported by urllib.request web_protocols = "http://", "https://", "ftp://" + uri = str(uri) # in case uri is a pathlib.Path object iris = visited_iris if visited_iris else {} dirs = visited_paths if visited_paths else set() if uri in iris: @@ -378,15 +379,30 @@ def load_uri(uri, dirname): return iris -def write_catalog(mappings, output="catalog-v001.xml"): - """Writes a catalog file. +def write_catalog( + mappings: dict, + output: "Union[str, Path]" = "catalog-v001.xml", + dir: "Union[str, Path]" = ".", + append: bool = False, +): # pylint: disable=redefined-builtin + """Write catalog file do disk. - `mappings` is a dict mapping ontology IRIs (name) to actual - locations (uri). It has the same format as the dict returned - by read_catalog(). - - `output` it the name of the generated file. + Args: + mappings: dict mapping ontology IRIs (name) to actual locations + (URIs). It has the same format as the dict returned by + read_catalog(). + output: name of catalog file. + dir: directory path to the catalog file. Only used if `output` + is a relative path. + append: whether to append to a possible existing catalog file. + If false, an existing file will be overwritten. """ + filename = (Path(dir) / output).resolve() + if filename.exists() and append: + iris = read_catalog(filename) + iris.update(mappings) + mappings = iris + res = [ '', '') res.append(" ") res.append("") - with open(output, "wt") as handle: + with open(filename, "wt") as handle: handle.write("\n".join(res) + "\n") @@ -568,82 +584,6 @@ def recur(graph, outext): recur(graph, outext) -def squash_imported( # pylint: disable=too-many-arguments - input_ontology, - output_ontology, - input_format=None, - output_format="xml", - url_from_catalog=None, - catalog_file="catalog-v001.xml", -): - """Convert imported ontologies and squash them into a single file. - - If `url_from_catalog` is true the catalog file will be used to - load possible imported ontologies. If `url_from_catalog` is None, it will - only be used if it exists in the same directory as the input file. - - The the squash rdflib graph is returned. - - Warning: - To convert to Turtle (`.ttl`) format, you must have installed - `rdflib>=6.0.0`. See [Known issues](../../../#known-issues) for - more information. - - """ - inroot = os.path.dirname(os.path.abspath(input_ontology)) - - if url_from_catalog is None: - url_from_catalog = os.path.exists(os.path.join(inroot, catalog_file)) - - if url_from_catalog: - iris = read_catalog(inroot, catalog_file=catalog_file, recursive=True) - else: - iris = {} - - imported = set() - - def recur(graph): - for subject, predicate, obj in graph.triples( - (None, URIRef("http://www.w3.org/2002/07/owl#imports"), None) - ): - graph.remove((subject, predicate, obj)) - iri = iris.get(str(obj), str(obj)) - if iri not in imported: - imported.add(iri) - new_graph = Graph() - new_graph.parse(iri, format=input_format) - recur(new_graph) - for triple in new_graph.triples((None, None, None)): - graph.add(triple) - - graph = Graph() - graph.parse(input_ontology, format=input_format) - recur(graph) - if output_ontology: - if not _validate_installed_version( - package="rdflib", min_version="6.0.0" - ) and ( - output_format == FMAP.get("ttl", "") - or os.path.splitext(output_ontology)[1] == "ttl" - ): - from rdflib import ( # pylint: disable=import-outside-toplevel - __version__ as __rdflib_version__, - ) - - warnings.warn( - IncompatibleVersion( - "To correctly convert to Turtle format, rdflib must be " - "version 6.0.0 or greater, however, the detected rdflib " - "version used by your Python interpreter is " - f"{__rdflib_version__!r}. For more information see the " - "'Known issues' section of the README." - ) - ) - - graph.serialize(destination=output_ontology, format=output_format) - return graph - - def infer_version(iri, version_iri): """Infer version from IRI and versionIRI.""" if str(version_iri[: len(iri)]) == str(iri): @@ -665,26 +605,43 @@ def infer_version(iri, version_iri): return version -def annotate_with_ontology(onto, imported=True): - """Annotate all entities with the `ontology_name` and `ontology_iri`. +def annotate_source(onto, imported=True): + """Annotate all entities with the base IRI of the ontology using + `rdfs:isDefinedBy` annotations. - If imported is true, imported ontologies will also be annotated. + If `imported` is true, all entities in imported sub-ontologies will + also be annotated. - The ontology name and IRI are important contextual information - that is lost when ontologies are inferred and/or squashed. This - function retain this information as annotations. + This is contextual information that is otherwise lost when the ontology + is squashed and/or inferred. """ - with onto: - if "ontology_name" not in onto.world._props: - types.new_class("ontology_name", (owlready2.AnnotationProperty,)) - if "ontology_iri" not in onto.world._props: - types.new_class("ontology_iri", (owlready2.AnnotationProperty,)) - + source = onto._abbreviate( + "http://www.w3.org/2000/01/rdf-schema#isDefinedBy" + ) for entity in onto.get_entities(imported=imported): - if onto.name not in getattr(entity, "ontology_name"): - setattr(entity, "ontology_name", onto.name) - if onto.base_iri not in getattr(entity, "ontology_iri"): - setattr(entity, "ontology_iri", onto.base_iri) + triple = ( + entity.storid, + source, + onto._abbreviate(entity.namespace.ontology.base_iri), + ) + if not onto._has_obj_triple_spo(*triple): + onto._add_obj_triple_spo(*triple) + + +def rename_iris(onto, annotation="prefLabel"): + """For IRIs with the given annotation, change the name of the entity + to the value of the annotation. Also add an `skos:exactMatch` + annotation referring to the old IRI. + """ + exactMatch = onto._abbreviate( # pylint:disable=invalid-name + "http://www.w3.org/2004/02/skos/core#exactMatch" + ) + for entity in onto.get_entities(): + if hasattr(entity, annotation) and getattr(entity, annotation): + onto._add_data_triple_spod( + entity.storid, exactMatch, entity.iri, "" + ) + entity.name = getattr(entity, annotation).first() def normalise_url(url): diff --git a/tests/ontopy/conftest.py b/tests/ontopy/conftest.py new file mode 100644 index 000000000..f6b2bac99 --- /dev/null +++ b/tests/ontopy/conftest.py @@ -0,0 +1,57 @@ +from pathlib import Path + +import pytest + + +# Files to skip +collect_ignore = ["interactive_test.py"] + + +# Utilities +def abbreviate(onto, iri, must_exist=True): + """Returns existing Owlready2 storid for `iri`.""" + if iri is None: + return None + abbreviater = getattr(onto, "_abbreviate") + storid = abbreviater(iri, create_if_missing=False) + if storid is None and must_exist: + raise ValueError(f"no such IRI in ontology: {iri}") + return storid + + +def get_triples(onto, s=None, p=None, o=None) -> list: + """Returns a list of triples matching spo.""" + return [ + ( + onto._unabbreviate(s_) if isinstance(s_, int) and s_ > 0 else s_, + onto._unabbreviate(p_) if isinstance(p_, int) and p_ > 0 else p_, + onto._unabbreviate(o_) if isinstance(o_, int) and o_ > 0 else o_, + ) + for s_, p_, o_, d in onto._get_triples_spod_spod( + abbreviate(onto, s), + abbreviate(onto, p), + abbreviate(onto, o, False) or o, + None, + ) + ] + + +def has_triple(onto, s=None, p=None, o=None) -> bool: + """Returns true if ontology `onto` contains the given triple. + + None may be used as a wildcard for of `s`, `p` or `o`. + """ + try: + return bool(get_triples(onto, s, p, o)) + except ValueError: + return False + + +@pytest.fixture +def onto() -> "ontopy.Ontology": + """Test ontology.""" + from ontopy import get_ontology + + url = Path(__file__).parent.parent / "testonto" / "testonto.ttl" + onto = get_ontology(url).load() + return onto diff --git a/tests/ontopy/interactive_test.py b/tests/ontopy/interactive_test.py new file mode 100644 index 000000000..2bce73cbe --- /dev/null +++ b/tests/ontopy/interactive_test.py @@ -0,0 +1,14 @@ +# Please keep this file as a script that can be run interactive with +# "ipython -i" +from pathlib import Path + +from .conftest import get_triples, has_triple +from ontopy import get_ontology + + +thisdir = Path(__file__).resolve().parent + +path = thisdir / ".." / "testonto" / "testonto.ttl" +onto = get_ontology(path).load() + +emmo = get_ontology().load() diff --git a/tests/ontopy/test_utils.py b/tests/ontopy/test_utils.py new file mode 100644 index 000000000..d9f45f2eb --- /dev/null +++ b/tests/ontopy/test_utils.py @@ -0,0 +1,39 @@ +import ontopy.utils as utils +from testutils import get_triples, has_triple + + +def test_annotate_source(onto): + assert not has_triple( + onto, + "http://emmo.info/models#testclass", + "http://www.w3.org/2000/01/rdf-schema#isDefinedBy", + "http://emmo.info/models#", + ) + + utils.annotate_source(onto, imported=False) + assert not has_triple( + onto, + "http://emmo.info/models#testclass", + "http://www.w3.org/2000/01/rdf-schema#isDefinedBy", + "http://emmo.info/models#", + ) + + utils.annotate_source(onto, imported=True) + assert has_triple( + onto, + "http://emmo.info/models#testclass", + "http://www.w3.org/2000/01/rdf-schema#isDefinedBy", + "http://emmo.info/models#", + ) + + +def test_rename_iris(onto): + assert not has_triple(onto, s="http://emmo.info/models#TestClass") + utils.rename_iris(onto) + assert has_triple(onto, s="http://emmo.info/models#TestClass") + assert has_triple( + onto, + "http://emmo.info/models#TestClass", + "http://www.w3.org/2004/02/skos/core#exactMatch", + "http://emmo.info/models#testclass", + ) diff --git a/tests/ontopy/testutils.py b/tests/ontopy/testutils.py new file mode 100644 index 000000000..6b18c4953 --- /dev/null +++ b/tests/ontopy/testutils.py @@ -0,0 +1,40 @@ +"""Test utility functions.""" + + +def abbreviate(onto, iri, must_exist=True): + """Returns existing Owlready2 storid for `iri`.""" + if iri is None: + return None + abbreviater = getattr(onto, "_abbreviate") + storid = abbreviater(iri, create_if_missing=False) + if storid is None and must_exist: + raise ValueError(f"no such IRI in ontology: {iri}") + return storid + + +def get_triples(onto, s=None, p=None, o=None) -> list: + """Returns a list of triples matching spo.""" + return [ + ( + onto._unabbreviate(s_) if isinstance(s_, int) and s_ > 0 else s_, + onto._unabbreviate(p_) if isinstance(p_, int) and p_ > 0 else p_, + onto._unabbreviate(o_) if isinstance(o_, int) and o_ > 0 else o_, + ) + for s_, p_, o_, d in onto._get_triples_spod_spod( + abbreviate(onto, s), + abbreviate(onto, p), + abbreviate(onto, o, False) or o, + None, + ) + ] + + +def has_triple(onto, s=None, p=None, o=None) -> bool: + """Returns true if ontology `onto` contains the given triple. + + None may be used as a wildcard for of `s`, `p` or `o`. + """ + try: + return bool(get_triples(onto, s, p, o)) + except ValueError: + return False diff --git a/tools/ontoconvert b/tools/ontoconvert index 889502aba..4a13dd452 100755 --- a/tools/ontoconvert +++ b/tools/ontoconvert @@ -2,20 +2,12 @@ """Converts file format of input ontology and write it to output file(s). """ import argparse -import os import warnings -from rdflib.graph import Graph from rdflib.util import guess_format -from ontopy.utils import ( - convert_imported, - FMAP, - IncompatibleVersion, - squash_imported, - _validate_installed_version, -) -from ontopy.factpluspluswrapper.factppgraph import FaCTPPGraph +from ontopy import get_ontology +from ontopy.utils import annotate_source, rename_iris def main(argv: list = None): @@ -27,6 +19,8 @@ def main(argv: list = None): manually / through Python. """ + # pylint: disable=too-many-branches,too-many-statements,invalid-name + # pylint: disable=too-many-locals parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("input", help="IRI/file to OWL source.") parser.add_argument("output", help="Output file name.") @@ -47,6 +41,24 @@ def main(argv: list = None): '"trix"' ), ) + parser.add_argument( + "--output-dir", + "-d", + default=".", + help=( + "Output directory. If `output` is a relative path, it will be " + "relative to this directory." + ), + ) + parser.add_argument( + "--overwrite", + "-w", + action="store_true", + help=( + "Whether to remove `output` if it already exists. " + "The default is to append to it." + ), + ) parser.add_argument( "--no-catalog", "-n", @@ -56,31 +68,49 @@ def main(argv: list = None): help="Whether to not read catalog file even if it exists.", ) parser.add_argument( - "--inferred", + "--reasoner", + "--infer", "-i", - action="store_true", + nargs="?", + const="FaCT++", + metavar="NAME", help=( - "Add additional relations inferred by the FaCT++ reasoner to the " - "converted ontology. Implies --squash." + "Add additional relations inferred by the reasoner. Supported " + 'reasoners are "FaCT++" (default), "HermiT" and "Pellet".' ), ) + parser.add_argument( + "--no-infer-imported", + "--no-reason-imported", + action="store_true", + help="Do not infer imported ontologies.", + ) parser.add_argument( "--base-iri", "-b", help=( - "Base iri of inferred ontology. The default is the base iri of the" - ' input ontology with "-inferred" appended to it. Used together ' - "with --inferred." + "Base iri of inferred ontology. The default is the base iri of " + 'the input ontology with "-inferred" appended to it. Used ' + "together with --reasoner." ), ) + parser.add_argument( + "--quiet", + "-q", + action="store_true", + help="Don't print a lot of stuff to stdout during reasoning.", + ) parser.add_argument( "--recursive", "-r", action="store_true", help=( - "Whether to also convert imported ontologies recursively. The " - "output is written to a directory structure matching the input. " - "This requires Protege catalog files to be present." + "Whether to also convert imported ontologies recursively using " + "rdflib. The output is written to a directory structure matching " + "the input. " + "This option requires Protege catalog files to be present. " + "It cannot be combined with other options like --squash, " + "--inferred, --annotate-source, and --rename-iris." ), ) parser.add_argument( @@ -92,6 +122,47 @@ def main(argv: list = None): "file. Cannot be combined with --recursive." ), ) + parser.add_argument( + "--annotate-source", + "-a", + action="store_true", + help=( + "Whether to annotate all entities with be base IRI of the source " + "ontology using `dcterms:source` relations. This is contextual " + "information that is otherwise lost when ontologies are inferred " + "and/or squashed." + ), + ) + parser.add_argument( + "--rename-iris", + "-R", + nargs="?", + const="prefLabel", + metavar="ANNOTATION", + help=( + "For all entities that have the given annotation ('prefLabel' " + "by default), change the name of the entity to the value of the " + "annotation.\n" + "For all changed entities, an `equivalentTo` annotation is " + "added, referring to the old name.\n" + "This option is useful to create a copy of an ontology with " + "more human readable IRIs." + ), + ) + parser.add_argument( + "--catalog-file", + "-C", + nargs="?", + const="catalog-v001.xml", + metavar="FILENAME", + help='Whether to write catalog file. Defaults to "catalog-v001.xml".', + ) + parser.add_argument( + "--append-catalog", + "-A", + action="store_true", + help="Whether to append to (possible) existing catalog file.", + ) args = parser.parse_args(args=argv) @@ -111,53 +182,130 @@ def main(argv: list = None): # Perform conversion with warnings.catch_warnings(record=True) as warnings_handle: warnings.simplefilter("always") - if args.recursive: - convert_imported( - args.input, - args.output, - input_format=input_format, - output_format=output_format, - url_from_catalog=args.url_from_catalog, - ) - elif args.inferred: - graph = squash_imported(args.input, None, input_format=input_format) - factpp_graph = FaCTPPGraph(graph) + + onto = get_ontology(args.input).load( + format=input_format, + url_from_catalog=args.url_from_catalog, + ) + + if args.annotate_source: + annotate_source(onto) + + if args.rename_iris: + rename_iris(onto, args.rename_iris) + + # owlready2.reasoning._apply_reasoning_results() is broken! + # Currently it messes up everything by adding the inferred + # rdfs:subClassOf relations as rdf:type relations :-( + # + # Temporary workaround + # Instead of calling onto.sync_reasoner() we call FaCTPPGraph + # directly. This means that HermiT and Pellet are currently not + # supported + if args.reasoner: + # pylint:disable=import-outside-toplevel + from ontopy.factpluspluswrapper.factppgraph import FaCTPPGraph + from ontopy.utils import FMAP + import rdflib + from rdflib import OWL, RDF, URIRef + + if args.reasoner != "FaCT++": + raise NotImplementedError( + "Only FaCT++ is currently supported..." + ) + if args.output_dir: + raise NotImplementedError( + "The --output-dir option is currently not supported " + "together with --reasoner." + ) + if args.recursive: + raise NotImplementedError( + "The --recursive option is currently not supported " + "together with --reasoner." + ) + if args.no_infer_imported: + raise NotImplementedError( + "The --no-infer-imported option is currently not " + "supported together with --reasoner." + ) + + graph0 = onto.world.as_rdflib_graph() + graph1 = rdflib.Graph() + for s, p, o in graph0.triples((None, None, None)): + if p != OWL.imports: + graph1.add((s, p, o)) + graph2 = FaCTPPGraph(graph1).inferred_graph() + + # Remove existing ontology(ies) + remove = list(graph2.triples((None, RDF.type, OWL.Ontology))) + for triple in remove: + graph2.remove(triple) + + # Add new ontology if args.base_iri: - factpp_graph.base_iri = args.base_iri - graph2 = factpp_graph.inferred_graph() - graph2.serialize(destination=args.output, format=output_format) - elif args.squash: - squash_imported( - args.input, - args.output, - input_format=input_format, - output_format=output_format, - url_from_catalog=args.url_from_catalog, + base_iri = args.base_iri + else: + stripped = onto.base_iri.rstrip("#/") + terminal = onto.base_iri[len(stripped) :] + base_iri = f"{stripped}-inferred{terminal}" + iri = URIRef(base_iri) + graph2.add((iri, RDF.type, OWL.Ontology)) + + # Add ontology metadata + ontologies = list(graph0.subjects(RDF.type, OWL.Ontology)) + while str(ontologies[0]) == "http://anonymous": + ontologies.pop(0) + if ontologies: + for s, p, o in graph0.triples((ontologies[0], None, None)): + graph2.add((iri, p, o)) + + # Serialise + if args.output_format: + fmt = FMAP.get(args.output_format, args.output_format) + else: + fmt = guess_format(args.output, fmap=FMAP) + graph2.namespace_manager.bind( + "emmo", rdflib.Namespace("http://emmo.info/emmo#") ) + graph2.serialize(destination=args.output, format=fmt) + else: - if not _validate_installed_version( - package="rdflib", min_version="6.0.0" - ) and ( - output_format == FMAP.get("ttl", "") - or os.path.splitext(args.output)[1] == "ttl" - ): - from rdflib import ( # pylint: disable=import-outside-toplevel - __version__ as __rdflib_version__, - ) - warnings.warn( - IncompatibleVersion( - "To correctly convert to Turtle format, rdflib must be" - " version 6.0.0 or greater, however, the detected " - "rdflib version used by your Python interpreter is " - f"{__rdflib_version__!r}. For more information see the" - " 'Known issues' section of the README." - ) - ) + onto.save( + args.output, + format=output_format, + dir=args.output_dir, + mkdir=True, + overwrite=args.overwrite, + recursive=args.recursive, + squash=args.squash, + write_catalog_file=bool(args.catalog_file), + append_catalog=args.append_catalog, + catalog_file=args.catalog_file, + ) + + # Bring back these lines when _apply_reasoning_results() works again + # Remember to remove the Bugs subsection in the documentation - graph = Graph() - graph.parse(args.input, format=input_format) - graph.serialize(destination=args.output, format=output_format) + # if args.reasoner: + # include_imported = not args.no_infer_imported + # verbose = not args.quiet + # onto.sync_reasoner(reasoner=args.reasoner, + # include_imported=include_imported, + # debug=verbose) + # + # onto.save( + # args.output, + # format=output_format, + # dir=args.output_dir, + # mkdir=True, + # overwrite=args.overwrite, + # recursive=args.recursive, + # squash=args.squash, + # write_catalog_file=bool(args.catalog_file), + # append_catalog=args.append_catalog, + # catalog_file=args.catalog_file, + # ) for warning in warnings_handle: print(