Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added possibility to update ontology. #527

Merged
merged 7 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions ontopy/excelparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments
imports: list = None,
catalog: dict = None,
force: bool = False,
input_ontology: Union[ontopy.ontology.Ontology, None] = None,
) -> Tuple[ontopy.ontology.Ontology, dict, dict]:
"""
Creates an ontology from an Excel-file.
Expand Down Expand Up @@ -72,6 +73,12 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments
catalog: Imported ontologies with (name, full path) key/value-pairs.
force: Forcibly make an ontology by skipping concepts
that are erroneously defined or other errors in the excel sheet.
input_ontology: Ontology that should be updated.
Default is None,
which means that a completely new ontology is generated.
If an input_ontology to be updated is provided,
the metadata sheet in the excel sheet will not be considered.


Returns:
A tuple with the:
Expand Down Expand Up @@ -139,6 +146,7 @@ def _relative_to_absolute_paths(path):
base_iri_from_metadata=base_iri_from_metadata,
catalog=catalog,
force=force,
input_ontology=input_ontology,
)


Expand All @@ -150,6 +158,7 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
base_iri_from_metadata: bool = True,
catalog: dict = None,
force: bool = False,
input_ontology: Union[ontopy.ontology.Ontology, None] = None,
) -> Tuple[ontopy.ontology.Ontology, dict]:
"""
Create an ontology from a pandas DataFrame.
Expand All @@ -166,15 +175,17 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
data = data[data["prefLabel"].str.len() > 0]
data.reset_index(drop=True, inplace=True)

# Make new ontology
onto, catalog = get_metadata_from_dataframe(
metadata, base_iri, imports=imports
)

# Set given or default base_iri if base_iri_from_metadata is False.
if not base_iri_from_metadata:
onto.base_iri = base_iri
if input_ontology:
onto = input_ontology
catalog = {}
else: # Create new ontology
onto, catalog = get_metadata_from_dataframe(
metadata, base_iri, imports=imports
)

# Set given or default base_iri if base_iri_from_metadata is False.
if not base_iri_from_metadata:
onto.base_iri = base_iri
labels = set(data["prefLabel"])
for altlabel in data["altLabel"].str.strip():
if not altlabel == "nan":
Expand All @@ -192,6 +203,7 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
}

onto.sync_python_names()

with onto:
remaining_rows = set(range(len(data)))
all_added_rows = []
Expand All @@ -202,7 +214,10 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
name = row["prefLabel"]
try:
onto.get_by_label(name)
if onto.world[onto.base_iri + name]:
if onto.base_iri in [
a.namespace.base_iri
for a in onto.get_by_label_all(name)
]:
if not force:
raise ExcelError(
f'Concept "{name}" already in ontology'
Expand All @@ -223,15 +238,13 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
continue
except NoSuchLabelError:
pass

if row["subClassOf"] == "nan":
if not force:
raise ExcelError(f"{row[0]} has no subClassOf")
parent_names = [] # Should be "owl:Thing"
concepts_with_errors["missing_parents"].append(name)
else:
parent_names = str(row["subClassOf"]).split(";")

parents = []
invalid_parent = False
for parent_name in parent_names:
Expand Down Expand Up @@ -383,7 +396,6 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
concepts_with_errors = {
key: set(value) for key, value in concepts_with_errors.items()
}

return onto, catalog, concepts_with_errors


Expand Down
2 changes: 0 additions & 2 deletions ontopy/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,6 @@ def _load( # pylint: disable=too-many-arguments,too-many-locals,too-many-branch
):
"""Help function for load()."""
web_protocol = "http://", "https://", "ftp://"

url = str(filename) if filename else self.base_iri.rstrip("/#")
if url.startswith(web_protocol):
baseurl = os.path.dirname(url)
Expand Down Expand Up @@ -594,7 +593,6 @@ def getmtime(path):
)
self.world._iri_mappings.update(iris)
resolved_url = self.world._iri_mappings.get(url, url)

# Append paths from catalog file to onto_path
for path in sorted(dirs, reverse=True):
if path not in owlready2.onto_path:
Expand Down
Binary file added tests/test_excelparser/onto_update.xlsx
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
<group id="Folder Repository, directory=, recursive=true, Auto-Update=false, version=2" prefer="public" xml:base="">
<uri name="https://raw.githubusercontent.com/emmo-repo/emmo-repo.github.io/master/versions/1.0.0-beta/emmo-inferred-chemistry" uri="https://raw.githubusercontent.com/emmo-repo/emmo-repo.github.io/master/versions/1.0.0-beta/emmo-inferred-chemistry2.ttl"/>
<uri name="http://ontology.info/ontology" uri="imported_onto/ontology.ttl"/>
<uri name="http://ontology.info/ontology/0.1.0" uri="imported_onto/ontology.ttl"/>
<uri name="http://ontology.info/ontology/0.1.0/subontology" uri="imported_onto/subontology.ttl"/>
<uri name="http://ontology.info/ontology" uri="../imported_onto/ontology.ttl"/>
<uri name="http://ontology.info/ontology/0.1.0" uri="../imported_onto/ontology.ttl"/>
<uri name="http://ontology.info/ontology/0.1.0/subontology" uri="../imported_onto/subontology.ttl"/>
</group>
</catalog>
23 changes: 22 additions & 1 deletion tests/test_excelparser/test_excelparser.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
"""Test the Excel parser module."""
import pytest
from typing import TYPE_CHECKING

from ontopy import get_ontology
from ontopy.excelparser import create_ontology_from_excel
from ontopy.utils import NoSuchLabelError

if TYPE_CHECKING:
from pathlib import Path


def test_excelparser(repo_dir: "Path") -> None:
"""Basic test for creating an ontology from an Excel file."""
ontopath = repo_dir / "tests" / "test_excelparser" / "fromexcelonto.ttl"
ontopath = (
repo_dir
/ "tests"
/ "test_excelparser"
/ "result_ontology"
/ "fromexcelonto.ttl"
)

onto = get_ontology(str(ontopath)).load()
xlspath = repo_dir / "tests" / "test_excelparser" / "onto.xlsx"
update_xlspath = (
repo_dir / "tests" / "test_excelparser" / "onto_update.xlsx"
)
ontology, catalog, errors = create_ontology_from_excel(xlspath, force=True)

assert onto == ontology
Expand All @@ -33,3 +44,13 @@ def test_excelparser(repo_dir: "Path") -> None:
}

assert len(ontology.get_by_label_all("Atom")) == 2
onto_length = len(list(onto.get_entities()))
with pytest.raises(NoSuchLabelError):
onto.ATotallyNewPattern

updated_onto, _, _ = create_ontology_from_excel(
update_xlspath, force=True, input_ontology=ontology
)
assert updated_onto.ATotallyNewPattern
assert updated_onto.Pattern.iri == onto.Pattern.iri
assert len(list(onto.classes())) + 1 == len(list(updated_onto.classes()))
39 changes: 39 additions & 0 deletions tests/tools/test_excel2onto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Test the `ontograph` tool."""
from pathlib import Path
import os
import pytest


@pytest.mark.parametrize("tool", ["excel2onto"], indirect=True)
def test_run(tool, tmpdir: Path) -> None:
"""Check that running `excel2onto` works."""
test_file = (
Path(__file__).resolve().parent.parent
/ "test_excelparser"
/ "onto.xlsx"
)
test_file2 = (
Path(__file__).resolve().parent.parent
/ "test_excelparser"
/ "onto_update.xlsx"
)

tool.main([f"--output={str(tmpdir)}/onto.ttl", "--force", str(test_file)])

tool.main(
[
f"--output={str(tmpdir)}/onto.ttl",
"--force",
"--input_ontology=newonto.ttl",
str(test_file2),
]
)

tool.main(
[
f"--output={str(tmpdir)}/ontology.ttl",
"--force",
"--update=False",
str(test_file),
]
)
59 changes: 54 additions & 5 deletions tools/excel2onto
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ ontology_template.xlsx
import argparse
import sys
import os
import warnings
from ontopy.excelparser import create_ontology_from_excel, ExcelError
from ontopy.utils import write_catalog
from ontopy import get_ontology
import owlready2 # pylint: disable=C0411


Expand All @@ -17,8 +19,16 @@ def english(string):
return owlready2.locstr(string, lang="en")


def main():
"""Main run function."""
def main(argv: list = None):
"""Main run function.

Parameters:
argv: List of arguments, similar to `sys.argv[1:]`.
Mainly for testing purposes, since it allows one to invoke the tool
manually / through Python.

"""

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"excelpath",
Expand All @@ -37,21 +47,60 @@ def main():
help="Whether to force generation of ontology on non-fatal error.",
)

parser.add_argument(
"--update",
"-u",
default=True,
help="Whether to update the the ontology with new concepts "
"or regenerate the full ontology."
"Currently only supports adding new concepts"
"Default is True.",
)

parser.add_argument(
"--input_ontology",
"-i",
default=None,
help="Path of previously generated ontology to update with"
" new concepts."
"Deafult is the same as 'output'.",
)

try:
args = parser.parse_args()
args = parser.parse_args(args=argv)
except SystemExit as exc:
sys.exit(exc.code) # Exit without traceback on invalid arguments

if args.update:
try:
if args.input_ontology:
input_ontology = get_ontology(args.input_ontology).load()
else:
input_ontology = get_ontology(args.output).load()
except FileNotFoundError as err:
if args.force:
warnings.warn(
"Did not find the input ontology, "
"will fully generate a new one."
)
input_ontology = None
else:
raise err

try:
ontology, catalog, _ = create_ontology_from_excel(
args.excelpath, force=args.force
os.path.abspath(args.excelpath),
force=args.force,
input_ontology=input_ontology,
)
except ExcelError as exc:
parser.exit(1, f"ERROR: {exc}\n")

# Save new ontology as turtle
ontology.save(os.path.join(args.output), format="turtle", overwrite=True)
write_catalog(catalog)
dirname = os.path.dirname(args.output)
if (not args.update) or (not os.path.exists(dirname + "/catalog-v001.xml")):
write_catalog(catalog, directory=dirname)


if __name__ == "__main__":
Expand Down