diff --git a/ontopy/excelparser.py b/ontopy/excelparser.py index 5f79d4611..6cc8e99f3 100755 --- a/ontopy/excelparser.py +++ b/ontopy/excelparser.py @@ -9,15 +9,18 @@ Note that correct case is mandatory. """ -from typing import Tuple, Union, Sequence +import os +from typing import Tuple, Union import warnings import pandas as pd +import numpy as np import pyparsing import ontopy from ontopy import get_ontology from ontopy.utils import EMMOntoPyException, NoSuchLabelError +from ontopy.utils import ReadCatalogError, read_catalog from ontopy.manchester import evaluate import owlready2 # pylint: disable=C0411 @@ -97,20 +100,31 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments imported ontology. """ - # Get imported ontologies from optional "Imports" sheet - if not imports: - imports = [] + web_protocol = "http://", "https://", "ftp://" + + def _relative_to_absolute_paths(path): + if isinstance(path, str): + if not path.startswith(web_protocol): + path = os.path.dirname(excelpath) + "/" + str(path) + return path + try: - imports_frame = pd.read_excel( + imports = pd.read_excel( excelpath, sheet_name=imports_sheet_name, skiprows=[1] ) except ValueError: - pass + imports = pd.DataFrame() else: - # Strip leading and trailing white spaces in path - imports.extend( - imports_frame["Imported ontologies"].str.strip().to_list() + # Strip leading and trailing white spaces in paths + imports.replace(r"^\s+", "", regex=True).replace( + r"\s+$", "", regex=True ) + # Set empty strings to nan + imports = imports.replace(r"^\s*$", np.nan, regex=True) + if "Imported ontologies" in imports.columns: + imports["Imported ontologies"] = imports[ + "Imported ontologies" + ].apply(_relative_to_absolute_paths) # Read datafile TODO: Some magic to identify the header row conceptdata = pd.read_excel( @@ -131,7 +145,7 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-branches,too-many-statements,too-many-arguments data: pd.DataFrame, metadata: pd.DataFrame, - imports: list, + imports: pd.DataFrame, base_iri: str = "http://emmo.info/emmo/domain/onto#", base_iri_from_metadata: bool = True, catalog: dict = None, @@ -332,7 +346,6 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran all_added_rows.extend(added_rows) # Add properties in a second loop - for index in all_added_rows: row = data.loc[index] properties = row["Relations"] @@ -379,6 +392,7 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran concepts_with_errors["in_imported_ontologies"] = concepts_with_errors[ "already_defined" ].intersection(imported_concepts) + return onto, catalog, concepts_with_errors @@ -386,7 +400,7 @@ def get_metadata_from_dataframe( # pylint: disable=too-many-locals,too-many-bra metadata: pd.DataFrame, base_iri: str, base_iri_from_metadata: bool = True, - imports: Sequence = (), + imports: pd.DataFrame = None, catalog: dict = None, ) -> Tuple[ontopy.ontology.Ontology, dict]: """Create ontology with metadata from pd.DataFrame""" @@ -409,12 +423,29 @@ def get_metadata_from_dataframe( # pylint: disable=too-many-locals,too-many-bra # Add imported ontologies catalog = {} if catalog is None else catalog locations = set() - for location in imports: + for _, row in imports.iterrows(): + # for location in imports: + location = row["Imported ontologies"] if not pd.isna(location) and location not in locations: imported = onto.world.get_ontology(location).load() onto.imported_ontologies.append(imported) catalog[imported.base_iri.rstrip("#/")] = location + try: + cat = read_catalog(location.rsplit("/", 1)[0]) + catalog.update(cat) + except ReadCatalogError: + warnings.warn(f"Catalog for {imported} not found.") locations.add(location) + # set defined prefix + if not pd.isna(row["prefix"]): + # set prefix for all ontologies with same 'base_iri_root' + if not pd.isna(row["base_iri_root"]): + onto.set_common_prefix( + iri_base=row["base_iri_root"], prefix=row["prefix"] + ) + # If base_root not given, set prefix only to top ontology + else: + imported.prefix = row["prefix"] with onto: # Add title diff --git a/ontopy/ontology.py b/ontopy/ontology.py index bf4a6c9c6..249b8db50 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -15,7 +15,7 @@ from pathlib import Path from collections import defaultdict from collections.abc import Iterable -from urllib.request import HTTPError +from urllib.request import HTTPError, URLError import rdflib from rdflib.util import guess_format @@ -40,6 +40,7 @@ _validate_installed_version, LabelDefinitionError, ThingClassDefinitionError, + EMMOntoPyException, ) if TYPE_CHECKING: @@ -617,7 +618,13 @@ def getmtime(path): if fmt and fmt not in OWLREADY2_FORMATS: # Convert filename to rdfxml before passing it to owlready2 graph = rdflib.Graph() - graph.parse(resolved_url, format=fmt) + try: + graph.parse(resolved_url, format=fmt) + except URLError as err: + raise EMMOntoPyException( + "URL error", err, resolved_url + ) from err + with tempfile.NamedTemporaryFile() as handle: graph.serialize(destination=handle, format="xml") handle.seek(0) diff --git a/tests/testonto/excelparser/catalog-v001.xml b/tests/test_excelparser/catalog-v001.xml similarity index 65% rename from tests/testonto/excelparser/catalog-v001.xml rename to tests/test_excelparser/catalog-v001.xml index 597de9fa0..698ab79a1 100644 --- a/tests/testonto/excelparser/catalog-v001.xml +++ b/tests/test_excelparser/catalog-v001.xml @@ -2,5 +2,8 @@ + + + diff --git a/tests/testonto/excelparser/fromexcelonto.ttl b/tests/test_excelparser/fromexcelonto.ttl similarity index 92% rename from tests/testonto/excelparser/fromexcelonto.ttl rename to tests/test_excelparser/fromexcelonto.ttl index 3811a3396..304994288 100644 --- a/tests/testonto/excelparser/fromexcelonto.ttl +++ b/tests/test_excelparser/fromexcelonto.ttl @@ -12,7 +12,8 @@ "Jesper Friis"@en, "Sylvain Gouttebroze"@en ; term:title "A test domain ontology"@en ; - owl:imports ; + owl:imports , + ; owl:versionInfo "0.01"@en . :EMMO_0264be35-e8ad-5b35-a1a3-84b37bde22d1 a owl:Class ; @@ -25,6 +26,10 @@ :EMMO_4b32833e-0833-56a7-903c-28a6a8191fe8 ; core:prefLabel "FiniteTemporalPattern"@en . +:EMMO_080262b7-4f7e-582b-916e-8274c73dd629 a owl:Class ; + rdfs:subClassOf ; + core:prefLabel "ANewTestClass"@en . + :EMMO_1c81f1eb-8b94-5e74-96de-1aeacbdb5b93 a owl:Class ; emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "The boundary of a grain"@en ; rdfs:subClassOf :EMMO_472ed27e-ce08-53cb-8453-56ab363275c4 ; @@ -58,6 +63,10 @@ :EMMO_9fa9ca88-2891-538a-a8dd-ccb8a08b9890 ; core:prefLabel "FiniteSpatioTemporalPattern"@en . +:EMMO_e4e653eb-72cd-5dd6-a428-f506d9679774 a owl:Class ; + rdfs:subClassOf ; + core:prefLabel "AnotherNewTestClass"@en . + :EMMO_e633d033-2af6-5f04-a706-dab826854fb1 a owl:Class ; emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "The boundary of a subgrain"@en ; rdfs:subClassOf owl:Thing ; diff --git a/tests/test_excelparser/imported_onto/catalog-v001.xml b/tests/test_excelparser/imported_onto/catalog-v001.xml new file mode 100644 index 000000000..f97363ba2 --- /dev/null +++ b/tests/test_excelparser/imported_onto/catalog-v001.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/tests/test_excelparser/imported_onto/ontology.ttl b/tests/test_excelparser/imported_onto/ontology.ttl new file mode 100644 index 000000000..1f16f8bf7 --- /dev/null +++ b/tests/test_excelparser/imported_onto/ontology.ttl @@ -0,0 +1,18 @@ +@prefix : . +@prefix owl: . +@prefix rdf: . +@prefix xml: . +@prefix xsd: . +@prefix rdfs: . +@prefix skos: . +@base . + + rdf:type owl:Ontology ; + owl:versionIRI ; + owl:imports ; + owl:versionInfo "0.1.0" . + + +:testclass rdf:type owl:Class ; + rdfs:subClassOf owl:Thing ; + skos:prefLabel "TestClass"@en . diff --git a/tests/test_excelparser/imported_onto/subontology.ttl b/tests/test_excelparser/imported_onto/subontology.ttl new file mode 100644 index 000000000..023a198d3 --- /dev/null +++ b/tests/test_excelparser/imported_onto/subontology.ttl @@ -0,0 +1,21 @@ +@prefix : . +@prefix owl: . +@prefix rdf: . +@prefix xml: . +@prefix xsd: . +@prefix rdfs: . +@prefix skos: . +@base . + + rdf:type owl:Ontology ; + owl:versionIRI . + + +# Annotations +skos:prefLabel rdf:type owl:AnnotationProperty . +skos:altLabel rdf:type owl:AnnotationProperty . + + +:testclass2 rdf:type owl:Class ; + rdfs:subClassOf owl:Thing ; + skos:prefLabel "TestClass2"@en . diff --git a/tests/test_excelparser/onto.xlsx b/tests/test_excelparser/onto.xlsx new file mode 100755 index 000000000..10bbfc527 Binary files /dev/null and b/tests/test_excelparser/onto.xlsx differ diff --git a/tests/test_excelparser.py b/tests/test_excelparser/test_excelparser.py similarity index 84% rename from tests/test_excelparser.py rename to tests/test_excelparser/test_excelparser.py index 48ac5bce2..00e526a2a 100644 --- a/tests/test_excelparser.py +++ b/tests/test_excelparser/test_excelparser.py @@ -10,12 +10,10 @@ def test_excelparser(repo_dir: "Path") -> None: """Basic test for creating an ontology from an Excel file.""" - ontopath = ( - repo_dir / "tests" / "testonto" / "excelparser" / "fromexcelonto.ttl" - ) + ontopath = repo_dir / "tests" / "test_excelparser" / "fromexcelonto.ttl" onto = get_ontology(str(ontopath)).load() - xlspath = repo_dir / "tests" / "testonto" / "excelparser" / "onto.xlsx" + xlspath = repo_dir / "tests" / "test_excelparser" / "onto.xlsx" ontology, catalog, errors = create_ontology_from_excel(xlspath, force=True) assert onto == ontology diff --git a/tests/test_load.py b/tests/test_load.py index b9d5e0c5b..e549b0e0e 100755 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -8,7 +8,7 @@ def test_load(repo_dir: "Path", testonto: "Ontology") -> None: import pytest from ontopy import get_ontology - from ontopy.ontology import HTTPError + from ontopy.ontology import EMMOntoPyException # Check that the defaults works emmo = get_ontology("emmo").load() # ttl format @@ -36,8 +36,9 @@ def test_load(repo_dir: "Path", testonto: "Ontology") -> None: assert onto.Electrolyte.prefLabel.first() == "Electrolyte" with pytest.raises( - HTTPError, - match="HTTP Error 404: https://emmo.info/non-existing/ontology: Not Found", + EMMOntoPyException, + match="'URL error', , 'http://emmo.info/non-existing/ontology'" + # match="HTTP Error 404: https://emmo.info/non-existing/ontology: Not Found", ): get_ontology("http://emmo.info/non-existing/ontology#").load() diff --git a/tests/testonto/catalog-v001.xml b/tests/testonto/catalog-v001.xml index 926f218ff..8299094ce 100644 --- a/tests/testonto/catalog-v001.xml +++ b/tests/testonto/catalog-v001.xml @@ -1,7 +1,7 @@ - - + + diff --git a/tests/testonto/excelparser/onto.xlsx b/tests/testonto/excelparser/onto.xlsx deleted file mode 100755 index 96bf4e7b8..000000000 Binary files a/tests/testonto/excelparser/onto.xlsx and /dev/null differ