From c994b21ec7c75c2844e6b693e561c5b6c9ba76b1 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Tue, 11 Apr 2023 07:43:42 +0200 Subject: [PATCH 01/10] Updated get_by_label() so that it now accepts label, name and full iri. --- ontopy/ontology.py | 70 ++++++++++++++++++++++++-------------- tests/test_get_by_label.py | 6 ++++ 2 files changed, 50 insertions(+), 26 deletions(-) create mode 100644 tests/test_get_by_label.py diff --git a/ontopy/ontology.py b/ontopy/ontology.py index a7f35262d..7ce2e0410 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -283,15 +283,24 @@ def get_by_label( The current implementation also supports "*" as a wildcard matching any number of characters. This may change in the future. """ - # pylint: disable=too-many-arguments,too-many-branches + # pylint: disable=too-many-arguments,too-many-branches,invalid-name if not isinstance(label, str): raise TypeError( f"Invalid label definition, must be a string: {label!r}" ) + + # Comment to reviewer: + # Do we really want to disallow space in label? + # I think we wanted ontopy to work with any ontology, also ontologies + # having space in their labels. + # Rather than enforcing EMMO conventions here, I think the user should + # run emmocheck on the ontology - it will alert about labels with + # spaces if " " in label: raise ValueError( f"Invalid label definition, {label!r} contains spaces." ) + if self._label_annotations is None: for iri in DEFAULT_LABEL_ANNOTATIONS: try: @@ -299,23 +308,20 @@ def get_by_label( except ValueError: pass + # Comment to reviewer: + # Since providing the prefix as argument is more explicit, I think + # that should take precedence. Changed it below... splitlabel = label.split(":", 1) - if len(splitlabel) > 2: - raise ValueError( - f"Invalid label definition, {label!r}" - " contains more than one ':' ." - "The string before ':' indicates the prefix. " - "The string after ':' indicates the label." - ) - if len(splitlabel) == 2: + if len(splitlabel) == 2 and not splitlabel[1].startswith("//"): label = splitlabel[1] if prefix and prefix != splitlabel[0]: warnings.warn( f"Prefix given both as argument ({prefix}) " f"and in label ({splitlabel[0]}). " - "Prefix given in label takes presendence " + "Prefix given in argument takes presendence " ) - prefix = splitlabel[0] + if not prefix: + prefix = splitlabel[0] if prefix: entitylist = self.get_by_label_all( @@ -327,31 +333,43 @@ def get_by_label( return entitylist[0] raise NoSuchLabelError( - f"No label annotations matches {label!r} with prefix " + f"No label annotations matches {label!r} with prefix " f"{prefix!r}" ) - # if label in self._namespaces: - # return self._namespaces[label] - if label_annotations is None: - annotations = (a.name for a in self.label_annotations) - else: - annotations = ( - a.name if hasattr(a, "storid") else a for a in label_annotations - ) - for key in annotations: - entity = self.search_one(**{key: label}) - if entity: - return entity + # Label is a full IRI + entity = self.world[label] + if entity: + return entity + # First entity with matching label annotation + annotation_ids = ( + (self._abbreviate(a, False) for a in label_annotations) + if label_annotations + else (a.storid for a in self.label_annotations) + ) + for annotation_id in annotation_ids: + for s, _, _, _ in self._get_data_triples_spod_spod( + None, annotation_id, label, None + ): + return self.world[self._unabbreviate(s)] + + # Special labels if self._special_labels and label in self._special_labels: return self._special_labels[label] + # Check if label is a name under base_iri entity = self.world[self.base_iri + label] if entity: return entity - raise NoSuchLabelError(f"No label annotations matches {label!r}") + # Check if label is a name in any namespace + for namespace in self._namespaces.keys(): + entity = self.world[namespace + label] + if entity: + return entity + + raise NoSuchLabelError(f"No label annotations matches '{label}'") def get_by_label_all(self, label, label_annotations=None, prefix=None): """Like get_by_label(), but returns a list with all matching labels. @@ -1684,7 +1702,7 @@ def _get_unabbreviated_triples( _unabbreviate(self, p, blank=blank), _unabbreviate(self, o, blank=blank), ) - for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=""): + for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=None): yield ( _unabbreviate(self, s, blank=blank), _unabbreviate(self, p, blank=blank), diff --git a/tests/test_get_by_label.py b/tests/test_get_by_label.py new file mode 100644 index 000000000..38e922d85 --- /dev/null +++ b/tests/test_get_by_label.py @@ -0,0 +1,6 @@ +from ontopy import get_ontology + + +emmo = get_ontology().load() +assert emmo[emmo.Atom.name] == emmo.Atom +assert emmo[emmo.Atom.iri] == emmo.Atom From 2b6e835b204129148ca5bc3bcdbcd77b9ea9d2f7 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 12 Apr 2023 14:41:31 +0200 Subject: [PATCH 02/10] Made search for labels including sub-ontologies --- ontopy/ontology.py | 20 +++++++++++++------- tests/test_get_by_label.py | 7 +++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/ontopy/ontology.py b/ontopy/ontology.py index 7ce2e0410..c62bcc5eb 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -187,19 +187,19 @@ def __init__(self, *args, **kwargs): ) def __dir__(self): - set_dir = set(super().__dir__()) + dirset = set(super().__dir__()) lst = list(self.get_entities(imported=self._dir_imported)) if self._dir_preflabel: - set_dir.update( + dirset.update( _.prefLabel.first() for _ in lst if hasattr(_, "prefLabel") ) if self._dir_label: - set_dir.update(_.label.first() for _ in lst if hasattr(_, "label")) + dirset.update(_.label.first() for _ in lst if hasattr(_, "label")) if self._dir_name: - set_dir.update(_.name for _ in lst if hasattr(_, "name")) + dirset.update(_.name for _ in lst if hasattr(_, "name")) - set_dir.difference_update({None}) # get rid of possible None - return sorted(set_dir) + dirset.difference_update({None}) # get rid of possible None + return sorted(dirset) def __getitem__(self, name): item = super().__getitem__(name) @@ -349,7 +349,13 @@ def get_by_label( else (a.storid for a in self.label_annotations) ) for annotation_id in annotation_ids: - for s, _, _, _ in self._get_data_triples_spod_spod( + # + # Question to reviewer: + # Should we make it an option (turned off by default) to search + # only the current ontology? + # I have sometimes been wishing for that. It is easily implemented, + # just remove ".world" in the line below. + for s, _, _, _ in self.world._get_data_triples_spod_spod( None, annotation_id, label, None ): return self.world[self._unabbreviate(s)] diff --git a/tests/test_get_by_label.py b/tests/test_get_by_label.py index 38e922d85..abdef8784 100644 --- a/tests/test_get_by_label.py +++ b/tests/test_get_by_label.py @@ -1,6 +1,13 @@ from ontopy import get_ontology +# Loading emmo-inferred where everything is sqashed into one ontology emmo = get_ontology().load() assert emmo[emmo.Atom.name] == emmo.Atom assert emmo[emmo.Atom.iri] == emmo.Atom + +# Load an ontology with imported sub-ontologies +onto = get_ontology( + "https://raw.githubusercontent.com/BIG-MAP/BattINFO/master/battinfo.ttl" +).load() +assert onto.Electrolyte.prefLabel.first() == "Electrolyte" From 0f900787dd34a55dad9a4963cc67e0d9f665b74e Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 12 Apr 2023 14:54:13 +0200 Subject: [PATCH 03/10] Documented how inconsisent prefixes are handled --- ontopy/ontology.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ontopy/ontology.py b/ontopy/ontology.py index c62bcc5eb..a1ea7e199 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -276,12 +276,17 @@ def get_by_label( If several entities have the same label, only the one which is found first is returned.Use get_by_label_all() to get all matches. + Note, if different prefixes are provided in the label and via + the `prefix` argument a warning will be issued and the + `prefix` argument will take precedence. + A NoSuchLabelError is raised if `label` cannot be found. Note ---- The current implementation also supports "*" as a wildcard matching any number of characters. This may change in the future. + """ # pylint: disable=too-many-arguments,too-many-branches,invalid-name if not isinstance(label, str): @@ -308,9 +313,6 @@ def get_by_label( except ValueError: pass - # Comment to reviewer: - # Since providing the prefix as argument is more explicit, I think - # that should take precedence. Changed it below... splitlabel = label.split(":", 1) if len(splitlabel) == 2 and not splitlabel[1].startswith("//"): label = splitlabel[1] From ce0a7f8f06bab96d16a232380b58b83994ef19fc Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 12 Apr 2023 15:49:28 +0200 Subject: [PATCH 04/10] Added tests for dir configurations --- tests/test_dir.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/test_dir.py diff --git a/tests/test_dir.py b/tests/test_dir.py new file mode 100644 index 000000000..4e5f55c6d --- /dev/null +++ b/tests/test_dir.py @@ -0,0 +1,21 @@ +from pathlib import Path + +from ontopy import get_ontology + + +thisdir = Path(__file__).resolve().parent + +onto = get_ontology( + thisdir / "test_excelparser/imported_onto/ontology.ttl" +).load() +assert "TestClass2" in dir(onto) + +onto.dir_imported = False +assert "TestClass2" not in dir(onto) +assert "testclass" not in dir(onto) + +onto.dir_imported = True +onto.dir_name = True +assert "TestClass2" in dir(onto) +assert "testclass" in dir(onto) +assert "testclass2" in dir(onto) From cbfd12a58444dd9466607953283b6961e03c4794 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 12 Apr 2023 16:04:45 +0200 Subject: [PATCH 05/10] Correct test to make it independent of being run interactively or not --- tests/test_dir.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_dir.py b/tests/test_dir.py index 4e5f55c6d..bcdfd244c 100644 --- a/tests/test_dir.py +++ b/tests/test_dir.py @@ -8,13 +8,16 @@ onto = get_ontology( thisdir / "test_excelparser/imported_onto/ontology.ttl" ).load() -assert "TestClass2" in dir(onto) - onto.dir_imported = False +onto.dir_label = False +onto.dir_name = False assert "TestClass2" not in dir(onto) -assert "testclass" not in dir(onto) onto.dir_imported = True +assert "TestClass2" in dir(onto) +assert "testclass" not in dir(onto) +assert "testclass2" not in dir(onto) + onto.dir_name = True assert "TestClass2" in dir(onto) assert "testclass" in dir(onto) From 572b5fbf30e7a6c018413179f7ac37749f579800 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 12 Apr 2023 17:02:21 +0200 Subject: [PATCH 06/10] Fixed test_dir --- ontopy/ontology.py | 1 - tests/test_dir.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ontopy/ontology.py b/ontopy/ontology.py index a1ea7e199..75ddc5972 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -197,7 +197,6 @@ def __dir__(self): dirset.update(_.label.first() for _ in lst if hasattr(_, "label")) if self._dir_name: dirset.update(_.name for _ in lst if hasattr(_, "name")) - dirset.difference_update({None}) # get rid of possible None return sorted(dirset) diff --git a/tests/test_dir.py b/tests/test_dir.py index bcdfd244c..867fa7758 100644 --- a/tests/test_dir.py +++ b/tests/test_dir.py @@ -9,11 +9,15 @@ thisdir / "test_excelparser/imported_onto/ontology.ttl" ).load() onto.dir_imported = False +onto.dir_preflabel = False onto.dir_label = False onto.dir_name = False assert "TestClass2" not in dir(onto) onto.dir_imported = True +onto.dir_preflabel = True +assert onto._dir_imported +assert onto.TestClass2 assert "TestClass2" in dir(onto) assert "testclass" not in dir(onto) assert "testclass2" not in dir(onto) From 07e09d8db12dedec8311a8448b252a1bda36ff2e Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 14 Apr 2023 00:00:02 +0200 Subject: [PATCH 07/10] Fixed issue #583 --- ontopy/ontology.py | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/ontopy/ontology.py b/ontopy/ontology.py index 75ddc5972..82b62c5b0 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -280,12 +280,6 @@ def get_by_label( `prefix` argument will take precedence. A NoSuchLabelError is raised if `label` cannot be found. - - Note - ---- - The current implementation also supports "*" as a wildcard - matching any number of characters. This may change in the future. - """ # pylint: disable=too-many-arguments,too-many-branches,invalid-name if not isinstance(label, str): @@ -293,18 +287,6 @@ def get_by_label( f"Invalid label definition, must be a string: {label!r}" ) - # Comment to reviewer: - # Do we really want to disallow space in label? - # I think we wanted ontopy to work with any ontology, also ontologies - # having space in their labels. - # Rather than enforcing EMMO conventions here, I think the user should - # run emmocheck on the ontology - it will alert about labels with - # spaces - if " " in label: - raise ValueError( - f"Invalid label definition, {label!r} contains spaces." - ) - if self._label_annotations is None: for iri in DEFAULT_LABEL_ANNOTATIONS: try: @@ -382,6 +364,11 @@ def get_by_label_all(self, label, label_annotations=None, prefix=None): """Like get_by_label(), but returns a list with all matching labels. Returns an empty list if no matches could be found. + + Note + ---- + The current implementation also supports "*" as a wildcard + matching any number of characters. This may change in the future. """ if not isinstance(label, str): raise TypeError( From 4eb77efa8df2ed73cf99ebb42e3021d2042d4615 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 14 Apr 2023 20:52:10 +0200 Subject: [PATCH 08/10] Added options `imported` and `colon_in_name` to get_by_label() --- ontopy/excelparser.py | 8 +++++- ontopy/ontology.py | 65 +++++++++++++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/ontopy/excelparser.py b/ontopy/excelparser.py index 71775f48f..13bdcbd76 100755 --- a/ontopy/excelparser.py +++ b/ontopy/excelparser.py @@ -21,6 +21,7 @@ from ontopy import get_ontology from ontopy.utils import EMMOntoPyException, NoSuchLabelError from ontopy.utils import ReadCatalogError, read_catalog +from ontopy.ontology import LabelDefinitionError from ontopy.manchester import evaluate import owlready2 # pylint: disable=C0411 @@ -276,7 +277,12 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran if not parents: parents = [owlready2.Thing] - concept = onto.new_entity(name, parents) + try: + concept = onto.new_entity(name, parents) + except LabelDefinitionError: + concepts_with_errors["wrongly_defined"].append(name) + continue + added_rows.add(index) # Add elucidation try: diff --git a/ontopy/ontology.py b/ontopy/ontology.py index 82b62c5b0..1361bec92 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -186,6 +186,15 @@ def __init__(self, *args, **kwargs): doc="Whether to include imported ontologies in dir() listing.", ) + # Other settings + _colon_in_name = False + colon_in_name = property( + fget=lambda self: self._colon_in_name, + fset=lambda self, v: setattr(self, "_colon_in_name", bool(v)), + doc="Whether to accept colon in name-part of IRI. " + "If true, the name cannot be prefixed.", + ) + def __dir__(self): dirset = set(super().__dir__()) lst = list(self.get_entities(imported=self._dir_imported)) @@ -256,7 +265,12 @@ def get_unabbreviated_triples( ) def get_by_label( - self, label: str, label_annotations: str = None, prefix: str = None + self, + label: str, + label_annotations: str = None, + prefix: str = None, + imported: bool = True, + colon_in_name: bool = None, ): """Returns entity with label annotation `label`. @@ -271,6 +285,10 @@ def get_by_label( the base iri of an ontology (with trailing slash (/) or hash (#) stripped off). The search for a matching label will be limited to this namespace. + imported: Whether to also look for `label` in imported ontologies. + colon_in_name: Whether to accept colon (:) in name-part of IRI. + Defaults to the `colon_in_name` property of `self`. + A true value cannot be combined with `prefix`. If several entities have the same label, only the one which is found first is returned.Use get_by_label_all() to get all matches. @@ -294,17 +312,25 @@ def get_by_label( except ValueError: pass - splitlabel = label.split(":", 1) - if len(splitlabel) == 2 and not splitlabel[1].startswith("//"): - label = splitlabel[1] - if prefix and prefix != splitlabel[0]: - warnings.warn( - f"Prefix given both as argument ({prefix}) " - f"and in label ({splitlabel[0]}). " - "Prefix given in argument takes presendence " + if colon_in_name is None: + colon_in_name = self._colon_in_name + if colon_in_name: + if prefix: + raise ValueError( + "`prefix` cannot be combined with `colon_in_name`" ) - if not prefix: - prefix = splitlabel[0] + else: + splitlabel = label.split(":", 1) + if len(splitlabel) == 2 and not splitlabel[1].startswith("//"): + label = splitlabel[1] + if prefix and prefix != splitlabel[0]: + warnings.warn( + f"Prefix given both as argument ({prefix}) " + f"and in label ({splitlabel[0]}). " + "Prefix given in argument takes presendence " + ) + if not prefix: + prefix = splitlabel[0] if prefix: entitylist = self.get_by_label_all( @@ -331,16 +357,13 @@ def get_by_label( if label_annotations else (a.storid for a in self.label_annotations) ) + get_triples = ( + self.world._get_data_triples_spod_spod + if imported + else self._get_data_triples_spod_spod + ) for annotation_id in annotation_ids: - # - # Question to reviewer: - # Should we make it an option (turned off by default) to search - # only the current ontology? - # I have sometimes been wishing for that. It is easily implemented, - # just remove ".world" in the line below. - for s, _, _, _ in self.world._get_data_triples_spod_spod( - None, annotation_id, label, None - ): + for s, _, _, _ in get_triples(None, annotation_id, label, None): return self.world[self._unabbreviate(s)] # Special labels @@ -1594,7 +1617,7 @@ def new_entity( Throws exception if name consists of more than one word. """ - if len(name.split(" ")) > 1: + if " " in name: raise LabelDefinitionError( f"Error in label name definition '{name}': " f"Label consists of more than one word." From 03f90a5369aadc9c56c99597f8f4771ce813cb23 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 14 Apr 2023 23:39:25 +0200 Subject: [PATCH 09/10] Satisfy pylint with longer variable names --- ontopy/ontology.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ontopy/ontology.py b/ontopy/ontology.py index 1361bec92..cb369d297 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -353,9 +353,9 @@ def get_by_label( # First entity with matching label annotation annotation_ids = ( - (self._abbreviate(a, False) for a in label_annotations) + (self._abbreviate(ann, False) for ann in label_annotations) if label_annotations - else (a.storid for a in self.label_annotations) + else (ann.storid for ann in self.label_annotations) ) get_triples = ( self.world._get_data_triples_spod_spod From ae5804f19fe50358d42f4ebb6b5d3ae7e7b2eb0c Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 15 Apr 2023 17:56:49 +0200 Subject: [PATCH 10/10] Added test for `colon_in_label` argument of get_by_label() --- ontopy/ontology.py | 24 ++++++++++++------------ tests/test_get_by_label.py | 11 +++++++++++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/ontopy/ontology.py b/ontopy/ontology.py index cb369d297..f6c499024 100644 --- a/ontopy/ontology.py +++ b/ontopy/ontology.py @@ -187,10 +187,10 @@ def __init__(self, *args, **kwargs): ) # Other settings - _colon_in_name = False - colon_in_name = property( - fget=lambda self: self._colon_in_name, - fset=lambda self, v: setattr(self, "_colon_in_name", bool(v)), + _colon_in_label = False + colon_in_label = property( + fget=lambda self: self._colon_in_label, + fset=lambda self, v: setattr(self, "_colon_in_label", bool(v)), doc="Whether to accept colon in name-part of IRI. " "If true, the name cannot be prefixed.", ) @@ -270,7 +270,7 @@ def get_by_label( label_annotations: str = None, prefix: str = None, imported: bool = True, - colon_in_name: bool = None, + colon_in_label: bool = None, ): """Returns entity with label annotation `label`. @@ -286,9 +286,9 @@ def get_by_label( (#) stripped off). The search for a matching label will be limited to this namespace. imported: Whether to also look for `label` in imported ontologies. - colon_in_name: Whether to accept colon (:) in name-part of IRI. - Defaults to the `colon_in_name` property of `self`. - A true value cannot be combined with `prefix`. + colon_in_label: Whether to accept colon (:) in a label or name-part + of IRI. Defaults to the `colon_in_label` property of `self`. + Setting this true cannot be combined with `prefix`. If several entities have the same label, only the one which is found first is returned.Use get_by_label_all() to get all matches. @@ -312,12 +312,12 @@ def get_by_label( except ValueError: pass - if colon_in_name is None: - colon_in_name = self._colon_in_name - if colon_in_name: + if colon_in_label is None: + colon_in_label = self._colon_in_label + if colon_in_label: if prefix: raise ValueError( - "`prefix` cannot be combined with `colon_in_name`" + "`prefix` cannot be combined with `colon_in_label`" ) else: splitlabel = label.split(":", 1) diff --git a/tests/test_get_by_label.py b/tests/test_get_by_label.py index abdef8784..5bbf1375c 100644 --- a/tests/test_get_by_label.py +++ b/tests/test_get_by_label.py @@ -1,4 +1,7 @@ +import pytest + from ontopy import get_ontology +from ontopy.ontology import NoSuchLabelError # Loading emmo-inferred where everything is sqashed into one ontology @@ -11,3 +14,11 @@ "https://raw.githubusercontent.com/BIG-MAP/BattINFO/master/battinfo.ttl" ).load() assert onto.Electrolyte.prefLabel.first() == "Electrolyte" + + +# Check colon_in_name argument +onto.Atom.altLabel.append("Element:X") +with pytest.raises(NoSuchLabelError): + onto.get_by_label("Element:X") + +assert onto.get_by_label("Element:X", colon_in_label=True) == onto.Atom