diff --git a/ontopy/excelparser.py b/ontopy/excelparser.py index 5e7288844..71775f48f 100755 --- a/ontopy/excelparser.py +++ b/ontopy/excelparser.py @@ -44,6 +44,7 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments imports: list = None, catalog: dict = None, force: bool = False, + input_ontology: Union[ontopy.ontology.Ontology, None] = None, ) -> Tuple[ontopy.ontology.Ontology, dict, dict]: """ Creates an ontology from an Excel-file. @@ -72,6 +73,12 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments catalog: Imported ontologies with (name, full path) key/value-pairs. force: Forcibly make an ontology by skipping concepts that are erroneously defined or other errors in the excel sheet. + input_ontology: Ontology that should be updated. + Default is None, + which means that a completely new ontology is generated. + If an input_ontology to be updated is provided, + the metadata sheet in the excel sheet will not be considered. + Returns: A tuple with the: @@ -139,6 +146,7 @@ def _relative_to_absolute_paths(path): base_iri_from_metadata=base_iri_from_metadata, catalog=catalog, force=force, + input_ontology=input_ontology, ) @@ -150,6 +158,7 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran base_iri_from_metadata: bool = True, catalog: dict = None, force: bool = False, + input_ontology: Union[ontopy.ontology.Ontology, None] = None, ) -> Tuple[ontopy.ontology.Ontology, dict]: """ Create an ontology from a pandas DataFrame. @@ -166,15 +175,17 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran data = data[data["prefLabel"].str.len() > 0] data.reset_index(drop=True, inplace=True) - # Make new ontology - onto, catalog = get_metadata_from_dataframe( - metadata, base_iri, imports=imports - ) - - # Set given or default base_iri if base_iri_from_metadata is False. - if not base_iri_from_metadata: - onto.base_iri = base_iri + if input_ontology: + onto = input_ontology + catalog = {} + else: # Create new ontology + onto, catalog = get_metadata_from_dataframe( + metadata, base_iri, imports=imports + ) + # Set given or default base_iri if base_iri_from_metadata is False. + if not base_iri_from_metadata: + onto.base_iri = base_iri labels = set(data["prefLabel"]) for altlabel in data["altLabel"].str.strip(): if not altlabel == "nan": @@ -192,6 +203,7 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran } onto.sync_python_names() + with onto: remaining_rows = set(range(len(data))) all_added_rows = [] @@ -202,7 +214,10 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran name = row["prefLabel"] try: onto.get_by_label(name) - if onto.world[onto.base_iri + name]: + if onto.base_iri in [ + a.namespace.base_iri + for a in onto.get_by_label_all(name) + ]: if not force: raise ExcelError( f'Concept "{name}" already in ontology' @@ -223,7 +238,6 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran continue except NoSuchLabelError: pass - if row["subClassOf"] == "nan": if not force: raise ExcelError(f"{row[0]} has no subClassOf") @@ -231,7 +245,6 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran concepts_with_errors["missing_parents"].append(name) else: parent_names = str(row["subClassOf"]).split(";") - parents = [] invalid_parent = False for parent_name in parent_names: @@ -383,7 +396,6 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran concepts_with_errors = { key: set(value) for key, value in concepts_with_errors.items() } - return onto, catalog, concepts_with_errors diff --git a/ontopy/ontology.py b/ontopy/ontology.py index dd75384a7..77028aa6b 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -540,7 +540,6 @@ def _load( # pylint: disable=too-many-arguments,too-many-locals,too-many-branch ): """Help function for load().""" web_protocol = "http://", "https://", "ftp://" - url = str(filename) if filename else self.base_iri.rstrip("/#") if url.startswith(web_protocol): baseurl = os.path.dirname(url) @@ -594,7 +593,6 @@ def getmtime(path): ) self.world._iri_mappings.update(iris) resolved_url = self.world._iri_mappings.get(url, url) - # Append paths from catalog file to onto_path for path in sorted(dirs, reverse=True): if path not in owlready2.onto_path: diff --git a/tests/test_excelparser/onto_update.xlsx b/tests/test_excelparser/onto_update.xlsx new file mode 100755 index 000000000..415c8cc12 Binary files /dev/null and b/tests/test_excelparser/onto_update.xlsx differ diff --git a/tests/test_excelparser/catalog-v001.xml b/tests/test_excelparser/result_ontology/catalog-v001.xml similarity index 65% rename from tests/test_excelparser/catalog-v001.xml rename to tests/test_excelparser/result_ontology/catalog-v001.xml index 698ab79a1..c7eff86b2 100644 --- a/tests/test_excelparser/catalog-v001.xml +++ b/tests/test_excelparser/result_ontology/catalog-v001.xml @@ -2,8 +2,8 @@ - - - + + + diff --git a/tests/test_excelparser/fromexcelonto.ttl b/tests/test_excelparser/result_ontology/fromexcelonto.ttl similarity index 100% rename from tests/test_excelparser/fromexcelonto.ttl rename to tests/test_excelparser/result_ontology/fromexcelonto.ttl diff --git a/tests/test_excelparser/test_excelparser.py b/tests/test_excelparser/test_excelparser.py index 34c069d4b..9ae141e34 100644 --- a/tests/test_excelparser/test_excelparser.py +++ b/tests/test_excelparser/test_excelparser.py @@ -1,8 +1,10 @@ """Test the Excel parser module.""" +import pytest from typing import TYPE_CHECKING from ontopy import get_ontology from ontopy.excelparser import create_ontology_from_excel +from ontopy.utils import NoSuchLabelError if TYPE_CHECKING: from pathlib import Path @@ -10,10 +12,19 @@ def test_excelparser(repo_dir: "Path") -> None: """Basic test for creating an ontology from an Excel file.""" - ontopath = repo_dir / "tests" / "test_excelparser" / "fromexcelonto.ttl" + ontopath = ( + repo_dir + / "tests" + / "test_excelparser" + / "result_ontology" + / "fromexcelonto.ttl" + ) onto = get_ontology(str(ontopath)).load() xlspath = repo_dir / "tests" / "test_excelparser" / "onto.xlsx" + update_xlspath = ( + repo_dir / "tests" / "test_excelparser" / "onto_update.xlsx" + ) ontology, catalog, errors = create_ontology_from_excel(xlspath, force=True) assert onto == ontology @@ -33,3 +44,13 @@ def test_excelparser(repo_dir: "Path") -> None: } assert len(ontology.get_by_label_all("Atom")) == 2 + onto_length = len(list(onto.get_entities())) + with pytest.raises(NoSuchLabelError): + onto.ATotallyNewPattern + + updated_onto, _, _ = create_ontology_from_excel( + update_xlspath, force=True, input_ontology=ontology + ) + assert updated_onto.ATotallyNewPattern + assert updated_onto.Pattern.iri == onto.Pattern.iri + assert len(list(onto.classes())) + 1 == len(list(updated_onto.classes())) diff --git a/tests/tools/test_excel2onto.py b/tests/tools/test_excel2onto.py new file mode 100644 index 000000000..59505d7b7 --- /dev/null +++ b/tests/tools/test_excel2onto.py @@ -0,0 +1,39 @@ +"""Test the `ontograph` tool.""" +from pathlib import Path +import os +import pytest + + +@pytest.mark.parametrize("tool", ["excel2onto"], indirect=True) +def test_run(tool, tmpdir: Path) -> None: + """Check that running `excel2onto` works.""" + test_file = ( + Path(__file__).resolve().parent.parent + / "test_excelparser" + / "onto.xlsx" + ) + test_file2 = ( + Path(__file__).resolve().parent.parent + / "test_excelparser" + / "onto_update.xlsx" + ) + + tool.main([f"--output={str(tmpdir)}/onto.ttl", "--force", str(test_file)]) + + tool.main( + [ + f"--output={str(tmpdir)}/onto.ttl", + "--force", + "--input_ontology=newonto.ttl", + str(test_file2), + ] + ) + + tool.main( + [ + f"--output={str(tmpdir)}/ontology.ttl", + "--force", + "--update=False", + str(test_file), + ] + ) diff --git a/tools/excel2onto b/tools/excel2onto index 0e00f962e..525fae061 100755 --- a/tools/excel2onto +++ b/tools/excel2onto @@ -7,8 +7,10 @@ ontology_template.xlsx import argparse import sys import os +import warnings from ontopy.excelparser import create_ontology_from_excel, ExcelError from ontopy.utils import write_catalog +from ontopy import get_ontology import owlready2 # pylint: disable=C0411 @@ -17,8 +19,16 @@ def english(string): return owlready2.locstr(string, lang="en") -def main(): - """Main run function.""" +def main(argv: list = None): + """Main run function. + + Parameters: + argv: List of arguments, similar to `sys.argv[1:]`. + Mainly for testing purposes, since it allows one to invoke the tool + manually / through Python. + + """ + parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "excelpath", @@ -37,21 +47,60 @@ def main(): help="Whether to force generation of ontology on non-fatal error.", ) + parser.add_argument( + "--update", + "-u", + default=True, + help="Whether to update the the ontology with new concepts " + "or regenerate the full ontology." + "Currently only supports adding new concepts" + "Default is True.", + ) + + parser.add_argument( + "--input_ontology", + "-i", + default=None, + help="Path of previously generated ontology to update with" + " new concepts." + "Deafult is the same as 'output'.", + ) + try: - args = parser.parse_args() + args = parser.parse_args(args=argv) except SystemExit as exc: sys.exit(exc.code) # Exit without traceback on invalid arguments + if args.update: + try: + if args.input_ontology: + input_ontology = get_ontology(args.input_ontology).load() + else: + input_ontology = get_ontology(args.output).load() + except FileNotFoundError as err: + if args.force: + warnings.warn( + "Did not find the input ontology, " + "will fully generate a new one." + ) + input_ontology = None + else: + raise err + try: ontology, catalog, _ = create_ontology_from_excel( - args.excelpath, force=args.force + os.path.abspath(args.excelpath), + force=args.force, + input_ontology=input_ontology, ) except ExcelError as exc: parser.exit(1, f"ERROR: {exc}\n") # Save new ontology as turtle ontology.save(os.path.join(args.output), format="turtle", overwrite=True) - write_catalog(catalog) + dirname = os.path.dirname(args.output) + if (not args.update) or (not os.path.exists(dirname + "/catalog-v001.xml")): + write_catalog(catalog, directory=dirname) if __name__ == "__main__":