diff --git a/docs/api_reference/ontopy/excelparser.md b/docs/api_reference/ontopy/excelparser.md new file mode 100644 index 000000000..9883971c0 --- /dev/null +++ b/docs/api_reference/ontopy/excelparser.md @@ -0,0 +1,3 @@ +# excelparser + +::: ontopy.excelparser diff --git a/examples/ontology-from-excel/make_microstructure_onto.py b/examples/ontology-from-excel/make_microstructure_onto.py new file mode 100755 index 000000000..418d812b7 --- /dev/null +++ b/examples/ontology-from-excel/make_microstructure_onto.py @@ -0,0 +1,10 @@ +""" +python example for creating ontology from excel +""" +from ontopy.excelparser import create_ontology_from_excel +from ontopy.utils import write_catalog + +ontology, catalog = create_ontology_from_excel("tool/onto.xlsx") + +ontology.save("microstructure_ontology.ttl", format="turtle", overwrite=True) +write_catalog(catalog) diff --git a/examples/ontology-from-excel/tool/onto.xlsx b/examples/ontology-from-excel/tool/onto.xlsx new file mode 100755 index 000000000..01ad4a91d Binary files /dev/null and b/examples/ontology-from-excel/tool/onto.xlsx differ diff --git a/ontopy/excelparser.py b/ontopy/excelparser.py new file mode 100755 index 000000000..db09a790e --- /dev/null +++ b/ontopy/excelparser.py @@ -0,0 +1,264 @@ +""" +Module from parsing an excelfile and creating an +ontology from it. + +The excelfile is read by pandas and the pandas +dataframe should have column names: +prefLabel, altLabel, Elucidation, Comments, Examples, +subClassOf, Relations. + +Note that correct case is mandatory. +""" +import warnings +from typing import Tuple, Union +import pyparsing +import pandas as pd +import ontopy +from ontopy import World, get_ontology +from ontopy.utils import NoSuchLabelError +from ontopy.manchester import evaluate +import owlready2 # pylint: disable=C0411 + + +def english(string): + """Returns `string` as an English location string.""" + return owlready2.locstr(string, lang="en") + + +def create_ontology_from_excel( # pylint: disable=too-many-arguments + excelpath: str, + concept_sheet_name: str = "Concepts", + metadata_sheet_name: str = "Metadata", + base_iri: str = "http://emmo.info/emmo/domain/onto#", + base_iri_from_metadata: bool = True, + catalog: dict = None, +) -> Tuple[ontopy.ontology.Ontology, dict]: + """ + Creates an ontology from an excelfile. + + Catalog is dict of imported ontologies with key name and value path. + """ + # Read datafile TODO: Some magic to identify the header row + conceptdata = pd.read_excel( + excelpath, sheet_name=concept_sheet_name, skiprows=[0, 2] + ) + metadata = pd.read_excel(excelpath, sheet_name=metadata_sheet_name) + return create_ontology_from_pandas( + conceptdata, metadata, base_iri, base_iri_from_metadata, catalog + ) + + +def create_ontology_from_pandas( # pylint: disable=too-many-locals,too-many-branches,too-many-statements + data: pd.DataFrame, + metadata: pd.DataFrame, + base_iri: str = "http://emmo.info/emmo/domain/onto#", + base_iri_from_metadata: bool = True, + catalog: dict = None, +) -> Tuple[ontopy.ontology.Ontology, dict]: + """ + Create an ontology from a pandas DataFrame. + """ + + # Remove Concepts without prefLabel and make all to string + data = data[data["prefLabel"].notna()] + data = data.astype({"prefLabel": "str"}) + + # Make new ontology + world = World() + onto = world.get_ontology(base_iri) + + onto, catalog = get_metadata_from_dataframe(metadata, onto) + + # base_iri from metadata if it exists and base_iri_from_metadata + if not base_iri_from_metadata: + onto.base_iri = base_iri + + onto.sync_python_names() + with onto: + # loop through the rows until no more are added + new_loop = True + final_loop = False + while new_loop: + number_of_added_classes = 0 + for _, row in data.iterrows(): + name = row["prefLabel"] + try: + if isinstance( + onto.get_by_label(name), owlready2.ThingClass + ): + continue + except NoSuchLabelError: + pass + + parent_names = str(row["subClassOf"]).split(";") + + try: + parents = [onto.get_by_label(pn) for pn in parent_names] + except NoSuchLabelError: + if final_loop is True: + parents = owlready2.Thing + + warnings.warn( + "Missing at least one of the defined parents. " + f"Concept: {name}; Defined parents: {parent_names}" + ) + new_loop = False + else: + continue + + concept = onto.new_entity(name, parents) + # Add elucidation + _add_literal( + row, concept.elucidation, "Elucidation", only_one=True + ) + + # Add examples + _add_literal(row, concept.example, "Examples") + + # Add comments + _add_literal(row, concept.comment, "Comments") + + # Add altLAbels + _add_literal(row, concept.altLabel, "altLabel") + + number_of_added_classes += 1 + + if number_of_added_classes == 0: + final_loop = True + + # Add properties in a second loop + for _, row in data.iterrows(): + properties = row["Relations"] + if isinstance(properties, str): + try: + concept = onto.get_by_label(row["prefLabel"]) + except NoSuchLabelError: + pass + props = properties.split(";") + for prop in props: + try: + concept.is_a.append(evaluate(onto, prop)) + except pyparsing.ParseException as err: + warnings.warn( + f"Error in Property assignment for: {concept}. " + f"Property to be Evaluated: {prop}. " + f"Error is {err}." + ) + + # Synchronise Python attributes to ontology + onto.sync_attributes( + name_policy="uuid", name_prefix="EMMO_", class_docstring="elucidation" + ) + onto.dir_label = False + return onto, catalog + + +def get_metadata_from_dataframe( # pylint: disable=too-many-locals,too-many-branches,too-many-statements + metadata: pd.DataFrame, + onto: owlready2.Ontology = None, + base_iri_from_metadata: bool = True, + catalog: dict = None, +) -> Tuple[ontopy.ontology.Ontology, dict]: + """ + Populate ontology with metada from pd.DataFrame + """ + + if onto is None: + onto = get_ontology() + + # base_iri from metadata if it exists and base_iri_from_metadata + if base_iri_from_metadata: + try: + base_iris = _parse_literal(metadata, "Ontology IRI", metadata=True) + if len(base_iris) > 1: + warnings.warn( + "More than one Ontology IRI given. The first was chosen." + ) + base_iri = base_iris[0] + "#" + onto.base_iri = base_iri + except (TypeError, ValueError, AttributeError): + pass + + # Get imported ontologies from metadata + try: + imported_ontology_paths = _parse_literal( + metadata, + "Imported ontologies", + metadata=True, + ) + except (TypeError, ValueError, AttributeError): + imported_ontology_paths = [] + # Add imported ontologies + catalog = {} if catalog is None else catalog + for path in imported_ontology_paths: + imported = onto.world.get_ontology(path).load() + onto.imported_ontologies.append(imported) + catalog[imported.base_iri.rstrip("/")] = path + + with onto: + # Add title + _add_literal( + metadata, onto.metadata.title, "Title", metadata=True, only_one=True + ) + + # Add license + _add_literal(metadata, onto.metadata.license, "License", metadata=True) + + # Add authors onto.metadata.author does not work! + _add_literal(metadata, onto.metadata.creator, "Author", metadata=True) + + # Add contributors + _add_literal( + metadata, onto.metadata.contributor, "Contributor", metadata=True + ) + + # Add versionInfo + _add_literal( + metadata, + onto.metadata.versionInfo, + "Ontology version Info", + metadata=True, + only_one=True, + ) + + return onto, catalog + + +def _parse_literal( + data: Union[pd.DataFrame, pd.Series], + name: str, + metadata: bool = False, + sep: str = ";", +) -> list: + """Helper function to make list ouf strings from ';'-delimited + strings in one string. + """ + + if metadata is True: + values = data.loc[data["Metadata name"] == name]["Value"].item() + else: + values = data[name] + if not pd.isna(values): + return str(values).split(sep) + return values.split(sep) + + +def _add_literal( # pylint: disable=too-many-arguments + data: Union[pd.DataFrame, pd.Series], + destination: owlready2.prop.IndividualValueList, # + name: str, + metadata: bool = False, + only_one: bool = False, + sep: str = ";", +) -> None: + try: + name_list = _parse_literal(data, name, metadata=metadata, sep=sep) + if only_one is True and len(name_list) > 1: + warnings.warn( + f"More than one {name} is given. The first was chosen." + ) + destination.append(english(name_list[0])) + else: + destination.extend([english(nm) for nm in name_list]) + except (TypeError, ValueError, AttributeError): + warnings.warn(f"No {name} added.") diff --git a/requirements.txt b/requirements.txt index 35acf5858..b7358e668 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,10 @@ blessings>=1.7,<2 Cython>=0.29.21,<0.30 defusedxml>=0.7.1,<1 graphviz>=0.16,<0.20 +openpyxl>=3.0.9,<3.1 Owlready2>=0.28,<0.36,!=0.32,!=0.34 packaging>=21.0<22 +pandas>=1.2,<1.4 pydot>=1.4.1,<2 Pygments>=2.7.4,<3 pyparsing>=2.4.7 diff --git a/setup.py b/setup.py index 228fe9b05..34b10fc30 100644 --- a/setup.py +++ b/setup.py @@ -108,6 +108,7 @@ def fglob(patt): "tools/emmocheck", "tools/ontoconvert", "tools/ontoversion", + "tools/excel2onto", ], package_data={ "ontopy.factpluspluswrapper.java.lib.so": ["*"], diff --git a/tests/test_excelparser.py b/tests/test_excelparser.py new file mode 100644 index 000000000..22d871e9a --- /dev/null +++ b/tests/test_excelparser.py @@ -0,0 +1,17 @@ +from ontopy import get_ontology +from ontopy.excelparser import create_ontology_from_excel +from ontopy.utils import write_catalog + + +def test_excelparser(repo_dir: "Path") -> None: + ontopath = ( + repo_dir / "tests" / "testonto" / "excelparser" / "fromexcelonto.ttl" + ) + onto = get_ontology(str(ontopath)).load() + xlspath = repo_dir / "tests" / "testonto" / "excelparser" / "onto.xlsx" + ontology, catalog = create_ontology_from_excel(xlspath) + assert onto == ontology + + +if __name__ == "__main__": + test_excelparser() diff --git a/tests/testonto/excelparser/catalog-v001.xml b/tests/testonto/excelparser/catalog-v001.xml new file mode 100644 index 000000000..597de9fa0 --- /dev/null +++ b/tests/testonto/excelparser/catalog-v001.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/tests/testonto/excelparser/fromexcelonto.ttl b/tests/testonto/excelparser/fromexcelonto.ttl new file mode 100644 index 000000000..f3434c173 --- /dev/null +++ b/tests/testonto/excelparser/fromexcelonto.ttl @@ -0,0 +1,116 @@ +@prefix : . +@prefix core: . +@prefix emmo: . +@prefix owl: . +@prefix rdfs: . +@prefix term: . + + a owl:Ontology ; + term:creator "Astrid Marthinsen"@en, + "Georg Schmidt"@en, + "Jesper Friis"@en, + "Sylvain Gouttebroze"@en, + "Tomas Manik"@en, + "Ulrike Cihak-Bayr"@en ; + term:title "Microstructure ontology based on EMMO - Top concepts"@en ; + owl:imports ; + owl:versionInfo "0.01"@en . + +:EMMO_0264be35-e8ad-5b35-a1a3-84b37bde22d1 a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Temporal pattern occurring in a time interval"@en ; + emmo:EMMO_b432d2d5_25f4_4165_99c5_5935a7763c1a "Light house during one night"@en ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onProperty emmo:EMMO_17e27c22_37e1_468c_9dd7_95e137f73e7f ; + owl:someValuesFrom :EMMO_b41c9cb3-3b2d-509f-9c93-aa04da134307 ], + [ a owl:Restriction ; + owl:onProperty emmo:EMMO_e1097637_70d2_4895_973f_2396f04fa204 ; + owl:someValuesFrom emmo:EMMO_d4f7d378_5e3b_468a_baa1_a7e98358cda7 ], + :EMMO_138590b8-3333-515d-87ab-717aac8434e6, + :EMMO_4b32833e-0833-56a7-903c-28a6a8191fe8 ; + core:prefLabel "FiniteTemporalPattern"@en . + +:EMMO_70269d17-fbaa-54a6-8905-ce4dee45e0dd a owl:Class ; + rdfs:subClassOf owl:Thing ; + core:prefLabel "Particle"@en . + +:EMMO_76b2eb15-3ab7-52b3-ade2-755aa390d63e a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Spatial pattern localized in a volume of space"@en ; + emmo:EMMO_b432d2d5_25f4_4165_99c5_5935a7763c1a "Textured surface after etching"@en ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onProperty emmo:EMMO_e1097637_70d2_4895_973f_2396f04fa204 ; + owl:someValuesFrom emmo:EMMO_f1a51559_aa3d_43a0_9327_918039f0dfed ], + [ a owl:Restriction ; + owl:onProperty emmo:EMMO_17e27c22_37e1_468c_9dd7_95e137f73e7f ; + owl:someValuesFrom :EMMO_472ed27e-ce08-53cb-8453-56ab363275c4 ], + :EMMO_4b32833e-0833-56a7-903c-28a6a8191fe8, + :EMMO_5f50f77e-f321-53e3-af76-fe5b0a347479 ; + core:prefLabel "FiniteSpatialPattern"@en . + +:EMMO_903bf818-c0b4-56ef-9673-799ba204795d a owl:Class ; + rdfs:subClassOf owl:Thing ; + core:prefLabel "Precipitate"@en . + +:EMMO_b0f0e57e-464d-562f-80ec-b216c92d5e88 a owl:Class ; + rdfs:subClassOf owl:Thing ; + core:prefLabel "Grain"@en . + +:EMMO_d35b8f2a-64c0-5f57-a569-308bc8f8a1c5 a owl:Class ; + rdfs:subClassOf owl:Thing ; + core:prefLabel "Phase"@en . + +:EMMO_e0b20a22-7e6f-5c81-beca-35bc5358e11b a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "NEED elucidation"@en ; + rdfs:subClassOf :EMMO_4b32833e-0833-56a7-903c-28a6a8191fe8, + :EMMO_9fa9ca88-2891-538a-a8dd-ccb8a08b9890 ; + core:prefLabel "FiniteSpatioTemporalPattern"@en . + +:EMMO_138590b8-3333-515d-87ab-717aac8434e6 a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Pattern with only temporal aspect"@en ; + emmo:EMMO_b432d2d5_25f4_4165_99c5_5935a7763c1a "Voltage in AC plug"@en ; + rdfs:subClassOf :EMMO_9fa9ca88-2891-538a-a8dd-ccb8a08b9890 ; + core:prefLabel "TemporalPattern"@en . + +:EMMO_472ed27e-ce08-53cb-8453-56ab363275c4 a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "NEED elucidation"@en ; + rdfs:subClassOf :EMMO_1b2bfe71-5da9-5c46-b137-be45c3e3f9c3 ; + core:prefLabel "SpatialBoundary"@en . + +:EMMO_5f50f77e-f321-53e3-af76-fe5b0a347479 a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Spatial pattern without regular temporal variations"@en ; + emmo:EMMO_b432d2d5_25f4_4165_99c5_5935a7763c1a "Infinite grid"@en ; + rdfs:subClassOf :EMMO_9fa9ca88-2891-538a-a8dd-ccb8a08b9890 ; + core:prefLabel "SpatialPattern"@en . + +:EMMO_b41c9cb3-3b2d-509f-9c93-aa04da134307 a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "NEED elucidation"@en ; + rdfs:subClassOf :EMMO_1b2bfe71-5da9-5c46-b137-be45c3e3f9c3 ; + core:prefLabel "TemporalBoundary"@en . + +:EMMO_cd254842-c697-55f6-917d-9805c77b9187 a owl:Class ; + emmo:EMMO_21ae69b4_235e_479d_8dd8_4f756f694c1b "A"@en, + "Just"@en, + "Test"@en ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "everything that can be perceived or measured"@en ; + rdfs:comment " this definition is much broader than definition of pattern such as \"the regular and repeated way in which something happens or is\""@en, + "a pattern is defined from a contrast"@en ; + rdfs:subClassOf emmo:EMMO_649bf97b_4397_4005_90d9_219755d92e34 ; + core:prefLabel "Pattern"@en . + +:EMMO_1b2bfe71-5da9-5c46-b137-be45c3e3f9c3 a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "NEED elucidation"@en ; + rdfs:subClassOf emmo:EMMO_649bf97b_4397_4005_90d9_219755d92e34 ; + core:prefLabel "Boundary"@en . + +:EMMO_4b32833e-0833-56a7-903c-28a6a8191fe8 a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "Pattern occuring within a boundary in the 4D space"@en ; + rdfs:comment "Every physical patterns are FinitePattern"@en ; + rdfs:subClassOf [ a owl:Restriction ; + owl:onProperty emmo:EMMO_17e27c22_37e1_468c_9dd7_95e137f73e7f ; + owl:someValuesFrom :EMMO_1b2bfe71-5da9-5c46-b137-be45c3e3f9c3 ], + :EMMO_cd254842-c697-55f6-917d-9805c77b9187 ; + core:prefLabel "FinitePattern"@en . + +:EMMO_9fa9ca88-2891-538a-a8dd-ccb8a08b9890 a owl:Class ; + emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "NEED elucidation"@en ; + rdfs:subClassOf :EMMO_cd254842-c697-55f6-917d-9805c77b9187 ; + core:prefLabel "SpatioTemporalPattern"@en . diff --git a/tests/testonto/excelparser/onto.xlsx b/tests/testonto/excelparser/onto.xlsx new file mode 100755 index 000000000..01ad4a91d Binary files /dev/null and b/tests/testonto/excelparser/onto.xlsx differ diff --git a/tools/excel2onto b/tools/excel2onto new file mode 100755 index 000000000..6aee6a601 --- /dev/null +++ b/tools/excel2onto @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""Creates and ontology from an excelfile. + +The excel file must be in the formate provided by +ontology_template.xlsx +""" +import argparse +import sys +import os +from ontopy.excelparser import create_ontology_from_excel +from ontopy.utils import write_catalog +import owlready2 # pylint: disable=C0411 + + +def english(string): + """Returns `string` as an English location string.""" + return owlready2.locstr(string, lang="en") + + +def main(): + """Main run function.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "excelpath", + help="path to excel book", + ) + parser.add_argument( + "--output", + "-o", + default="ontology.ttl", + help="Name of output ontology, ´ontology.ttl´ is default", + ) + try: + args = parser.parse_args() + except SystemExit as exc: + sys.exit(exc.code) # Exit without traceback on invalid arguments + + ontology, catalog = create_ontology_from_excel(args.excelpath) + + # Save new ontology as turtle + ontology.save(os.path.join(args.output), format="turtle", overwrite=True) + write_catalog(catalog) + + +if __name__ == "__main__": + main()