diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index a011e97..d02b528 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -2,9 +2,9 @@ name: Run Tests on: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] jobs: build: diff --git a/README.rst b/README.rst index 4ba8be3..9eaca6b 100644 --- a/README.rst +++ b/README.rst @@ -136,7 +136,7 @@ Given a type system with a type :code:`cassis.Token` that has an :code:`id` and ] for token in tokens: - cas.add_annotation(token) + cas.add(token) Selecting annotations ~~~~~~~~~~~~~~~~~~~~~ @@ -158,16 +158,25 @@ Selecting annotations # Annotation values can be accessed as properties print('Token: begin={0}, end={1}, id={2}, pos={3}'.format(token.begin, token.end, token.id, token.pos)) -Selecting nested features +Getting and setting (nested) features ~~~~~~~~~~~~~~~~~~~~~~~~~ -If you have nested feature structures, e.g. a feature structure with feature :code:`a` that has a +If you want to access a variable but only have its name as a string or have nested feature structures, +e.g. a feature structure with feature :code:`a` that has a feature :code:`b` that has a feature :code:`c`, some of which can be :code:`None`, then you can use the following: +.. code:: python + + fs.get("var_name") # Or + fs["var_name"] + +Or in the nested case, + .. code:: python fs.get("a.b.c") + fs["a.b.c"] If :code:`a` or :code:`b` or :code:`c` are :code:`None`, then this returns instead of @@ -183,6 +192,29 @@ Another example would be a StringList containing :code:`["Foo", "Bar", "Baz"]`: assert lst.get("tail.tail.tail.head") == None assert lst.get("tail.tail.tail.tail.head") == None +The same goes for setting: + +.. code:: python + + # Functional + lst.set("head", "new_foo") + lst.set("tail.head", "new_bar") + lst.set("tail.tail.head", "new_baz") + + assert lst.get("head") == "new_foo" + assert lst.get("tail.head") == "new_bar" + assert lst.get("tail.tail.head") == "new_baz" + + # Bracket access + lst["head"] = "newer_foo" + lst["tail.head"] = "newer_bar" + lst["tail.tail.head"] = "newer_baz" + + assert lst["head"] == "newer_foo" + assert lst["tail.head"] == "newer_bar" + assert lst["tail.tail.head"] == "newer_baz" + + Creating types and adding features ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -193,10 +225,10 @@ Creating types and adding features typesystem = TypeSystem() parent_type = typesystem.create_type(name='example.ParentType') - typesystem.add_feature(type_=parent_type, name='parentFeature', rangeTypeName='String') + typesystem.create_feature(type_=parent_type, name='parentFeature', rangeTypeName='String') child_type = typesystem.create_type(name='example.ChildType', supertypeName=parent_type.name) - typesystem.add_feature(type_=child_type, name='childFeature', rangeTypeName='Integer') + typesystem.create_feature(type_=child_type, name='childFeature', rangeTypeName='Integer') annotation = child_type(parentFeature='parent', childFeature='child') @@ -242,7 +274,7 @@ as a :code:`Cas` . cas = Cas() cas.sofa_string = "I like cheese ." - cas.add_annotations([ + cas.add_all([ Token(begin=0, end=1), Token(begin=2, end=6), Token(begin=7, end=13), @@ -255,7 +287,7 @@ as a :code:`Cas` . view = cas.create_view('testView') view.sofa_string = "I like blackcurrant ." - view.add_annotations([ + view.add_all([ Token(begin=0, end=1), Token(begin=2, end=6), Token(begin=7, end=19), @@ -318,8 +350,8 @@ available as a member variable :code:`self_` or :code:`type_` on the respective typesystem = TypeSystem() ExampleType = typesystem.create_type(name='example.Type') - typesystem.add_feature(type_=ExampleType, name='self', rangeTypeName='String') - typesystem.add_feature(type_=ExampleType, name='type', rangeTypeName='String') + typesystem.create_feature(type_=ExampleType, name='self', rangeTypeName='String') + typesystem.create_feature(type_=ExampleType, name='type', rangeTypeName='String') annotation = ExampleType(self_="Test string1", type_="Test string2") diff --git a/cassis/cas.py b/cassis/cas.py index 999771d..cccc427 100644 --- a/cassis/cas.py +++ b/cassis/cas.py @@ -270,7 +270,7 @@ def views(self) -> List[View]: """ return list(self._views.values()) - def add_annotation(self, annotation: FeatureStructure, keep_id: Optional[bool] = True): + def add(self, annotation: FeatureStructure, keep_id: Optional[bool] = True): """Adds an annotation to this Cas. Args: @@ -294,7 +294,18 @@ def add_annotation(self, annotation: FeatureStructure, keep_id: Optional[bool] = self._current_view.add_annotation_to_index(annotation) - def add_annotations(self, annotations: Iterable[FeatureStructure]): + @deprecation.deprecated(details="Use add()") + def add_annotation(self, annotation: FeatureStructure, keep_id: Optional[bool] = True): + """Adds an annotation to this Cas. + + Args: + annotation: The annotation to add. + keep_id: Keep the XMI id of `annotation` if true, else generate a new one. + + """ + self.add(annotation, keep_id) + + def add_all(self, annotations: Iterable[FeatureStructure]): """Adds several annotations at once to this CAS. Args: @@ -302,9 +313,19 @@ def add_annotations(self, annotations: Iterable[FeatureStructure]): """ for annotation in annotations: - self.add_annotation(annotation) + self.add(annotation) - def remove_annotation(self, annotation: FeatureStructure): + @deprecation.deprecated(details="Use add_all()") + def add_annotations(self, annotations: Iterable[FeatureStructure]): + """Adds several annotations at once to this CAS. + + Args: + annotations: An iterable of annotations to add. + + """ + self.add_all(annotations) + + def remove(self, annotation: FeatureStructure): """Removes an annotation from an index. This throws if the annotation was not present. @@ -313,6 +334,16 @@ def remove_annotation(self, annotation: FeatureStructure): """ self._current_view.remove_annotation_from_index(annotation) + @deprecation.deprecated(details="Use remove()") + def remove_annotation(self, annotation: FeatureStructure): + """Removes an annotation from an index. This throws if the + annotation was not present. + + Args: + annotation: The annotation to remove. + """ + self.remove(annotation) + @deprecation.deprecated(details="Use annotation.get_covered_text()") def get_covered_text(self, annotation: FeatureStructure) -> str: """Gets the text that is covered by `annotation`. diff --git a/cassis/typesystem.py b/cassis/typesystem.py index fcac6f6..dc22017 100644 --- a/cassis/typesystem.py +++ b/cassis/typesystem.py @@ -7,6 +7,7 @@ from typing import IO, Any, Callable, Dict, Iterable, Iterator, List, Optional, Set, Union import attr +import deprecation from lxml import etree from more_itertools import unique_everseen from toposort import toposort_flatten @@ -205,6 +206,13 @@ def get_covered_text(self) -> str: raise NotImplementedError() def get(self, path: str) -> Optional[Any]: + """ Recursively gets an attribute, e.g. fs.get("a.b.c") would return attribute `c` of `b` of `a`. + + If you have nested feature structures, e.g. a feature structure with feature `a` that has a feature `b` that + has a feature `c`, some of which can be `None`, then you can use the following: + + fs.get("a.b.c") + """ cur = self for part in path.split("."): cur = getattr(cur, part, None) @@ -213,6 +221,31 @@ def get(self, path: str) -> Optional[Any]: return cur + def set(self, path: str, value: Any): + """ Recursively sets an attribute, e.g. fs.set("a.b.c", 42) would set attribute `c` of `b` of `a` to `42`. """ + + if "." not in path: + setattr(self, path, value) + return + + idx = path.rindex(".") + + value_name = path[idx + 1 :] + path = path[:idx] + + target = self.get(path) + + if target is None: + raise AttributeError(f"Attribute with name [{value_name}] not found on: {target}") + + setattr(target, value_name, value) + + def __getitem__(self, key): + return self.get(key) + + def __setitem__(self, key, value): + return self.set(key, value) + def __hash__(self): return self.xmiID @@ -432,7 +465,7 @@ def __init__(self, add_document_annotation_type: bool = True): # Array t = self.create_type(name="uima.cas.ArrayBase", supertypeName="uima.cas.TOP") - self.add_feature(t, name="elements", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True) + self.create_feature(t, name="elements", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True) self.create_type(name="uima.cas.FSArray", supertypeName="uima.cas.ArrayBase") self.create_type(name="uima.cas.BooleanArray", supertypeName="uima.cas.ArrayBase") @@ -449,52 +482,47 @@ def __init__(self, add_document_annotation_type: bool = True): self.create_type(name="uima.cas.FSList", supertypeName="uima.cas.ListBase") self.create_type(name="uima.cas.EmptyFSList", supertypeName="uima.cas.FSList") t = self.create_type(name="uima.cas.NonEmptyFSList", supertypeName="uima.cas.FSList") - self.add_feature(t, name="head", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True) - self.add_feature(t, name="tail", rangeTypeName="uima.cas.FSList", multipleReferencesAllowed=True) + self.create_feature(t, name="head", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True) + self.create_feature(t, name="tail", rangeTypeName="uima.cas.FSList", multipleReferencesAllowed=True) # FloatList self.create_type(name="uima.cas.FloatList", supertypeName="uima.cas.ListBase") self.create_type(name="uima.cas.EmptyFloatList", supertypeName="uima.cas.FloatList") t = self.create_type(name="uima.cas.NonEmptyFloatList", supertypeName="uima.cas.FloatList") - self.add_feature(t, name="head", rangeTypeName="uima.cas.Float") - self.add_feature(t, name="tail", rangeTypeName="uima.cas.FloatList", multipleReferencesAllowed=True) + self.create_feature(t, name="head", rangeTypeName="uima.cas.Float") + self.create_feature(t, name="tail", rangeTypeName="uima.cas.FloatList", multipleReferencesAllowed=True) # IntegerList self.create_type(name="uima.cas.IntegerList", supertypeName="uima.cas.ListBase") self.create_type(name="uima.cas.EmptyIntegerList", supertypeName="uima.cas.IntegerList") t = self.create_type(name="uima.cas.NonEmptyIntegerList", supertypeName="uima.cas.IntegerList") - self.add_feature(t, name="head", rangeTypeName="uima.cas.Integer") - self.add_feature(t, name="tail", rangeTypeName="uima.cas.IntegerList", multipleReferencesAllowed=True) + self.create_feature(t, name="head", rangeTypeName="uima.cas.Integer") + self.create_feature(t, name="tail", rangeTypeName="uima.cas.IntegerList", multipleReferencesAllowed=True) # StringList self.create_type(name="uima.cas.StringList", supertypeName="uima.cas.ListBase") self.create_type(name="uima.cas.EmptyStringList", supertypeName="uima.cas.StringList") t = self.create_type(name="uima.cas.NonEmptyStringList", supertypeName="uima.cas.StringList") - self.add_feature(t, name="head", rangeTypeName="uima.cas.String") - self.add_feature(t, name="tail", rangeTypeName="uima.cas.StringList", multipleReferencesAllowed=True) + self.create_feature(t, name="head", rangeTypeName="uima.cas.String") + self.create_feature(t, name="tail", rangeTypeName="uima.cas.StringList", multipleReferencesAllowed=True) # Sofa t = self.create_type(name="uima.cas.Sofa", supertypeName="uima.cas.TOP") - self.add_feature(t, name="sofaNum", rangeTypeName="uima.cas.Integer") - self.add_feature(t, name="sofaID", rangeTypeName="uima.cas.String") - self.add_feature(t, name="mimeType", rangeTypeName="uima.cas.String") - self.add_feature( - t, - name="sofaArray", - rangeTypeName="uima.cas.TOP", - multipleReferencesAllowed=True, - ) - self.add_feature(t, name="sofaString", rangeTypeName="uima.cas.String") - self.add_feature(t, name="sofaURI", rangeTypeName="uima.cas.String") + self.create_feature(t, name="sofaNum", rangeTypeName="uima.cas.Integer") + self.create_feature(t, name="sofaID", rangeTypeName="uima.cas.String") + self.create_feature(t, name="mimeType", rangeTypeName="uima.cas.String") + self.create_feature(t, name="sofaArray", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True) + self.create_feature(t, name="sofaString", rangeTypeName="uima.cas.String") + self.create_feature(t, name="sofaURI", rangeTypeName="uima.cas.String") # AnnotationBase t = self.create_type(name="uima.cas.AnnotationBase", supertypeName="uima.cas.TOP") - self.add_feature(t, name="sofa", rangeTypeName="uima.cas.Sofa") + self.create_feature(t, name="sofa", rangeTypeName="uima.cas.Sofa") # Annotation t = self.create_type(name="uima.tcas.Annotation", supertypeName="uima.cas.AnnotationBase") - self.add_feature(t, name="begin", rangeTypeName="uima.cas.Integer") - self.add_feature(t, name="end", rangeTypeName="uima.cas.Integer") + self.create_feature(t, name="begin", rangeTypeName="uima.cas.Integer") + self.create_feature(t, name="end", rangeTypeName="uima.cas.Integer") if add_document_annotation_type: self._add_document_annotation_type() @@ -646,7 +674,7 @@ def subsumes(self, parent_name: str, child_name: str) -> bool: return False - def add_feature( + def create_feature( self, type_: Type, name: str, @@ -692,6 +720,32 @@ def add_feature( type_.add_feature(feature) + @deprecation.deprecated(details="Use create_feature") + def add_feature( + self, + type_: Type, + name: str, + rangeTypeName: str, + elementType: str = None, + description: str = None, + multipleReferencesAllowed: bool = None, + ): + """Adds a feature to the given type. + Args: + type_: The type to which the feature will be added + name: The name of the new feature + rangeTypeName: The feature's rangeTypeName specifies the type of value that the feature can take. + elementType: The elementType of a feature is optional, and applies only when the rangeTypeName + is uima.cas.FSArray or uima.cas.FSList The elementType specifies what type of value can be + assigned as an element of the array or list. + description: The description of the new feature + multipleReferencesAllowed: Setting this to true indicates that the array or list may be shared, + so changes to it may affect other objects in the CAS. + Raises: + Exception: If a feature with name `name` already exists in `type_`. + """ + self.create_feature(type_, name, rangeTypeName, elementType, description, multipleReferencesAllowed) + def to_xml(self, path: Union[str, Path, None] = None) -> Optional[str]: """Creates a XMI representation of this type system. @@ -751,7 +805,7 @@ def _defines_predefined_type(self, type_name): def _add_document_annotation_type(self): t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName="uima.tcas.Annotation") - self.add_feature(t, name="language", rangeTypeName="uima.cas.String") + self.create_feature(t, name="language", rangeTypeName="uima.cas.String") # Deserializing @@ -803,12 +857,6 @@ def deserialize(self, source: Union[IO, str]) -> TypeSystem: description = self._get_elem_as_str(elem.find("{*}description")) supertypeName = self._get_elem_as_str(elem.find("{*}supertypeName")) - if "." not in type_name: - type_name = "uima.noNamespace." + type_name - - if "." not in supertypeName: - supertypeName = "uima.noNamespace." + supertypeName - types[type_name] = Type(name=type_name, supertypeName=supertypeName, description=description) type_dependencies[type_name].add(supertypeName) @@ -886,7 +934,7 @@ def deserialize(self, source: Union[IO, str]) -> TypeSystem: # between type references in inheritance and type references in range or element type. for t in created_types: for f in features[t.name]: - ts.add_feature( + ts.create_feature( t, name=f.name, rangeTypeName=f.rangeTypeName, @@ -944,20 +992,13 @@ def _serialize_type(self, xf: IO, type_: Type): typeDescription = etree.Element("typeDescription") name = etree.SubElement(typeDescription, "name") - type_name = type_.name - if type_name.startswith("uima.noNamespace."): - type_name = type_name.replace("uima.noNamespace.", "") - - name.text = type_name + name.text = type_.name description = etree.SubElement(typeDescription, "description") description.text = type_.description supertype_name_node = etree.SubElement(typeDescription, "supertypeName") - supertype_name = type_.supertypeName - if supertype_name.startswith("uima.noNamespace."): - supertype_name = supertype_name.replace("uima.noNamespace.", "") - supertype_name_node.text = supertype_name + supertype_name_node.text = type_.supertypeName # Only create the `feature` element if there is at least one feature feature_list = list(type_.features) diff --git a/cassis/xmi.py b/cassis/xmi.py index 8656c9d..2e2630a 100644 --- a/cassis/xmi.py +++ b/cassis/xmi.py @@ -235,7 +235,7 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b fs.begin = sofa._offset_converter.uima_to_cassis(fs.begin) fs.end = sofa._offset_converter.uima_to_cassis(fs.end) - view.add_annotation(fs, keep_id=True) + view.add(fs, keep_id=True) cas._xmi_id_generator = IdGenerator(self._max_xmi_id + 1) cas._sofa_num_generator = IdGenerator(self._max_sofa_num + 1) @@ -262,9 +262,12 @@ def _parse_view(self, elem) -> ProtoView: def _parse_feature_structure(self, typesystem: TypeSystem, elem, children: Dict[str, List[str]]): # Strip the http prefix, replace / with ., remove the ecore part # TODO: Error checking - typename = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip() + type_name: str = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip() - AnnotationType = typesystem.get_type(typename) + if type_name.startswith("uima.noNamespace."): + type_name = type_name[17:] + + AnnotationType = typesystem.get_type(type_name) attributes = dict(elem.attrib) attributes.update(children) @@ -339,6 +342,11 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True): self._serialize_cas_null(root) + # Generate XMI ids for unset ones + for fs in cas._find_all_fs(): + if fs.xmiID is None: + fs.xmiID = cas._get_next_xmi_id() + # Find all fs, even the ones that are not directly added to a sofa for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID): self._serialize_feature_structure(cas, root, fs) @@ -363,8 +371,12 @@ def _serialize_cas_null(self, root: etree.Element): def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: FeatureStructure): ts = cas.typesystem + type_name = fs.type + if "." not in type_name: + type_name = f"uima.noNamespace.{type_name}" + # The type name is a Java package, e.g. `org.myproj.Foo`. - parts = fs.type.split(".") + parts = type_name.split(".") # The CAS type namespace is converted to an XML namespace URI by the following rule: # replace all dots with slashes, prepend http:///, and append .ecore. diff --git a/tests/fixtures.py b/tests/fixtures.py index e13ed24..3a670f3 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -160,6 +160,20 @@ def cas_with_leniency_xmi(cas_with_leniency_path): return f.read() +# CAS and with feature structures whose types have no namespace + + +@pytest.fixture +def cas_has_fs_with_no_namespace_path(): + return os.path.join(FIXTURE_DIR, "xmi", "cas_has_fs_with_no_namespace.xmi") + + +@pytest.fixture +def cas_has_fs_with_no_namespace_xmi(cas_has_fs_with_no_namespace_path): + with open(cas_has_fs_with_no_namespace_path, "r") as f: + return f.read() + + # Small type system @@ -334,7 +348,7 @@ def tokens(small_typesystem_xml): ] for token in tokens: - cas.add_annotation(token) + cas.add(token) return tokens @@ -350,6 +364,6 @@ def sentences(small_typesystem_xml): sentences = [SentenceType(begin=0, end=26, id="0"), SentenceType(begin=27, end=47, id="1")] for sentence in sentences: - cas.add_annotation(sentence) + cas.add(sentence) return sentences diff --git a/tests/test_cas.py b/tests/test_cas.py index 2c1986b..11ad0a2 100644 --- a/tests/test_cas.py +++ b/tests/test_cas.py @@ -112,7 +112,7 @@ def test_sofa_uri_can_be_set_and_read(): def test_select(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) - cas.add_annotations(tokens + sentences) + cas.add_all(tokens + sentences) actual_tokens = list(cas.select("cassis.Token")) actual_sentences = list(cas.select("cassis.Sentence")) @@ -124,7 +124,7 @@ def test_select(small_typesystem_xml, tokens, sentences): def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sentences): annotations = tokens + sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) - cas.add_annotations(annotations) + cas.add_all(annotations) actual_annotations = list(cas.select("uima.tcas.Annotation")) @@ -133,7 +133,7 @@ def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sent def test_select_covered(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) - cas.add_annotations(tokens + sentences) + cas.add_all(tokens + sentences) first_sentence, second_sentence = sentences tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] @@ -153,8 +153,8 @@ def test_select_covered_overlapping(small_typesystem_xml, tokens, sentences): sentence = SentenceType(begin=0, end=10) annotations = [AnnotationType(begin=0, end=5), AnnotationType(begin=0, end=5)] - cas.add_annotation(sentence) - cas.add_annotations(annotations) + cas.add(sentence) + cas.add_all(annotations) actual_annotations = list(cas.select_covered("test.Annotation", sentence)) @@ -172,7 +172,7 @@ def test_select_covered_also_returns_parent_instances(small_typesystem_xml, toke annotations.append(subtoken2) cas = Cas(typesystem=typesystem) - cas.add_annotations(annotations) + cas.add_all(annotations) first_sentence, second_sentence = sentences tokens_in_first_sentence = tokens[:6] @@ -187,7 +187,7 @@ def test_select_covered_also_returns_parent_instances(small_typesystem_xml, toke def test_select_covering(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) - cas.add_annotations(tokens + sentences) + cas.add_all(tokens + sentences) actual_first_sentence, actual_second_sentence = sentences tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] @@ -219,7 +219,7 @@ def test_select_covering_also_returns_parent_instances(small_typesystem_xml, tok subsentence2 = SubSentenceType(begin=second_sentence.begin, end=second_sentence.end) annotations.append(subsentence1) annotations.append(subsentence2) - cas.add_annotations(annotations) + cas.add_all(annotations) tokens_in_first_sentence = tokens[:6] tokens_in_second_sentence = tokens[6:] @@ -237,9 +237,9 @@ def test_select_covering_also_returns_parent_instances(small_typesystem_xml, tok def test_select_only_returns_annotations_of_current_view(tokens, sentences, small_typesystem_xml): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) - cas.add_annotations(tokens) + cas.add_all(tokens) view = cas.create_view("testView") - view.add_annotations(sentences) + view.add_all(sentences) actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all()) actual_annotations_in_test_view = list(cas.get_view("testView").select_all()) @@ -257,47 +257,6 @@ def test_select_returns_feature_structures(cas_with_collections_xmi, typesystem_ assert len(arrs) == 1 -# Get with path selector - - -def test_get_path_semargs(cas_with_references_xmi, webanno_typesystem_xml): - typesystem = load_typesystem(webanno_typesystem_xml) - cas = load_cas_from_xmi(cas_with_references_xmi, typesystem=typesystem) - - result = cas.select("de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred") - assert len(result) == 1 - pred = result[0] - first_arg = pred.arguments[0] - - end = first_arg.get("target.end") - - assert end == 5 - - -def test_get_path_stringlist(): - cas = Cas() - - NonEmptyStringList = cas.typesystem.get_type("uima.cas.NonEmptyStringList") - EmptyStringList = cas.typesystem.get_type("uima.cas.EmptyStringList") - - data = ["foo", "bar", "baz"] - lst = NonEmptyStringList() - - cur = lst - for s in data: - cur.head = s - cur.tail = NonEmptyStringList() - cur = cur.tail - - cur.tail = EmptyStringList() - print(lst) - - assert lst.get("head") == "foo" - assert lst.get("tail.head") == "bar" - assert lst.get("tail.tail.head") == "baz" - assert lst.get("tail.tail.tail.head") is None - - # Covered text @@ -359,7 +318,7 @@ def test_add_annotation(small_typesystem_xml): TokenType(begin=25, end=26, id="5", pos="."), ] for token in tokens: - cas.add_annotation(token) + cas.add(token) actual_tokens = list(cas.select(TokenType.name)) assert actual_tokens == tokens @@ -379,7 +338,7 @@ def test_add_annotation_generates_ids(small_typesystem_xml, tokens): TokenType(begin=25, end=26, id="5", pos="."), ] for token in tokens: - cas.add_annotation(token) + cas.add(token) actual_tokens = list(cas.select(TokenType.name)) assert all([token.xmiID is not None for token in actual_tokens]) @@ -393,7 +352,7 @@ def test_annotations_are_ordered_correctly(small_typesystem_xml, tokens): random.shuffle(list(annotations)) for token in annotations: - cas.add_annotation(token) + cas.add(token) actual_tokens = list(cas.select("cassis.Token")) @@ -408,7 +367,7 @@ def test_leniency_type_not_in_typeystem_not_lenient(small_typesystem_xml): cas = Cas() with pytest.raises(RuntimeError, match="Typesystem of CAS does not contain type"): - cas.add_annotation(token) + cas.add(token) def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml): @@ -418,7 +377,7 @@ def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml): token = TokenType(begin=0, end=3, id="0", pos="NNP") cas = Cas(lenient=True) - cas.add_annotation(token) + cas.add(token) def test_select_returns_children_fs_instances(cas_with_inheritance_xmi, typesystem_with_inheritance_xml): @@ -439,16 +398,16 @@ def test_select_returns_children_fs_instances(cas_with_inheritance_xmi, typesyst def test_removing_of_existing_fs_works(small_typesystem_xml, tokens, sentences): annotations = tokens + sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) - cas.add_annotations(annotations) + cas.add_all(annotations) for token in tokens: - cas.remove_annotation(token) + cas.remove(token) actual_annotations = list(cas.select("uima.tcas.Annotation")) assert set(actual_annotations) == set(sentences) for sentence in sentences: - cas.remove_annotation(sentence) + cas.remove(sentence) actual_annotations = list(cas.select("uima.tcas.Annotation")) assert set(actual_annotations) == set() @@ -459,11 +418,11 @@ def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) view = cas.create_view("testView") - cas.add_annotations(annotations) - view.add_annotations(annotations) + cas.add_all(annotations) + view.add_all(annotations) for annotation in annotations: - cas.remove_annotation(annotation) + cas.remove(annotation) assert set(cas.select("uima.tcas.Annotation")) == set() assert set(view.select("uima.tcas.Annotation")) == set(annotations) @@ -473,14 +432,14 @@ def test_removing_throws_if_fs_not_found(small_typesystem_xml, tokens, sentences cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) with pytest.raises(ValueError): - cas.remove_annotation(tokens[0]) + cas.remove(tokens[0]) def test_removing_throws_if_fs_in_other_view(small_typesystem_xml, tokens, sentences): cas = Cas(typesystem=load_typesystem(small_typesystem_xml)) - cas.add_annotations(tokens) + cas.add_all(tokens) view = cas.create_view("testView") with pytest.raises(ValueError): - view.remove_annotation(tokens[0]) + view.remove(tokens[0]) diff --git a/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml b/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml index 6805a8d..245aa76 100644 --- a/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml +++ b/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml @@ -2,41 +2,18 @@ - ArtifactID + TypeWithNoNameSpace A unique artifact identifier. - uima.cas.TOP + uima.tcas.Annotation - artifactID - A unique identification string for the artifact. This should be the file name for files, - or the unique identifier used in a database if the document source is a database - collection reader. + testFeature + I am the testFeatureDescription uima.cas.String - - ArtifactMetadata - A piece of metadata about the artifact in the form of a key value pair. - uima.cas.TOP - - - key - - uima.cas.String - - - value - - uima.cas.String - - - - - ChildOfArtifactID - ArtifactID - - + diff --git a/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi b/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi new file mode 100644 index 0000000..ed9748b --- /dev/null +++ b/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/tests/test_typesystem.py b/tests/test_typesystem.py index 5e4ba66..39eaf5d 100644 --- a/tests/test_typesystem.py +++ b/tests/test_typesystem.py @@ -27,7 +27,7 @@ def test_feature_can_be_added(): typesystem = TypeSystem() test_type = typesystem.create_type(name="test.Type") - typesystem.add_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature") + typesystem.create_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature") actual_type = typesystem.get_type("test.Type") actual_feature = actual_type.get_feature("testFeature") @@ -41,9 +41,9 @@ def test_feature_adding_warns_if_redefined_identically(): test_type = typesystem.create_type(name="test.Type") - typesystem.add_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature") + typesystem.create_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature") with pytest.warns(UserWarning): - typesystem.add_feature( + typesystem.create_feature( type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature" ) @@ -52,10 +52,10 @@ def test_feature_adding_throws_if_redefined_differently(): typesystem = TypeSystem() test_type = typesystem.create_type(name="test.Type") - typesystem.add_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature") + typesystem.create_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature") with pytest.raises(ValueError): - typesystem.add_feature( + typesystem.create_feature( type_=test_type, name="testFeature", rangeTypeName="Boolean", description="A test feature" ) @@ -75,7 +75,7 @@ def test_type_can_be_created(): def test_type_can_create_instances(): typesystem = TypeSystem() test_type = typesystem.create_type(name="test.Type") - typesystem.add_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature") + typesystem.create_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature") annotation = test_type(begin=0, end=42, testFeature="testValue") @@ -88,10 +88,10 @@ def test_type_can_create_instance_with_inherited_fields(): typesystem = TypeSystem() parent_type = typesystem.create_type(name="test.ParentType") - typesystem.add_feature(type_=parent_type, name="parentFeature", rangeTypeName="String") + typesystem.create_feature(type_=parent_type, name="parentFeature", rangeTypeName="String") child_type = typesystem.create_type(name="test.ChildType", supertypeName=parent_type.name) - typesystem.add_feature(type_=child_type, name="childFeature", rangeTypeName="Integer") + typesystem.create_feature(type_=child_type, name="childFeature", rangeTypeName="Integer") annotation = child_type(parentFeature="parent", childFeature="child") @@ -500,7 +500,7 @@ def test_that_merging_compatible_typesystem_works(name, rangeTypeName, elementTy ts = TypeSystem() t = ts.create_type("test.ArraysAndListsWithElementTypes", supertypeName="uima.cas.TOP") - ts.add_feature( + ts.create_feature( type_=t, name=name, rangeTypeName=rangeTypeName, @@ -533,7 +533,7 @@ def test_that_merging_incompatible_typesystem_throws(name, rangeTypeName, elemen ts = TypeSystem() t = ts.create_type("test.ArraysAndListsWithElementTypes", supertypeName="uima.cas.TOP") - ts.add_feature( + ts.create_feature( type_=t, name=name, rangeTypeName=rangeTypeName, @@ -596,8 +596,8 @@ def test_typchecking_fs_array(): MyOtherValue = cas.typesystem.create_type(name="test.MyOtherValue", supertypeName="uima.cas.TOP") MyCollection = cas.typesystem.create_type("test.MyCollection", supertypeName="uima.cas.TOP") - cas.typesystem.add_feature(type_=MyValue, name="value", rangeTypeName="uima.cas.String") - cas.typesystem.add_feature( + cas.typesystem.create_feature(type_=MyValue, name="value", rangeTypeName="uima.cas.String") + cas.typesystem.create_feature( type_=MyCollection, name="members", rangeTypeName="uima.cas.FSArray", elementType="test.MyValue" ) @@ -605,7 +605,7 @@ def test_typchecking_fs_array(): collection = MyCollection(members=members) - cas.add_annotation(collection) + cas.add(collection) errors = cas.typecheck() @@ -614,3 +614,80 @@ def test_typchecking_fs_array(): 2, "Member of [uima.cas.FSArray] has unsound type: was [test.MyOtherValue], need [test.MyValue]!" ) assert errors[0] == expected_error + + +# Getting/Setting with path selector + + +def test_get_set_path_semargs(cas_with_references_xmi, webanno_typesystem_xml): + typesystem = load_typesystem(webanno_typesystem_xml) + cas = load_cas_from_xmi(cas_with_references_xmi, typesystem=typesystem) + + result = cas.select("de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred") + assert len(result) == 1 + pred = result[0] + first_arg = pred.arguments[0] + + assert first_arg.get("target.end") == 5 + first_arg.set("target.end", 42) + assert first_arg.get("target.end") == 42 + + assert first_arg["target.end"] == 42 + first_arg["target.end"] = 23 + assert first_arg["target.end"] == 23 + + +def test_get_set_path_stringlist(): + cas = Cas() + + NonEmptyStringList = cas.typesystem.get_type("uima.cas.NonEmptyStringList") + EmptyStringList = cas.typesystem.get_type("uima.cas.EmptyStringList") + + data = ["foo", "bar", "baz"] + lst = NonEmptyStringList() + + cur = lst + for s in data: + cur.head = s + cur.tail = NonEmptyStringList() + cur = cur.tail + cur.tail = EmptyStringList() + + assert lst.get("head") == "foo" + assert lst.get("tail.head") == "bar" + assert lst.get("tail.tail.head") == "baz" + assert lst.get("tail.tail.tail.head") is None + + assert lst["head"] == "foo" + assert lst["tail.head"] == "bar" + assert lst["tail.tail.head"] == "baz" + assert lst["tail.tail.tail.head"] is None + + lst.set("head", "new_foo") + lst.set("tail.head", "new_bar") + lst.set("tail.tail.head", "new_baz") + + assert lst.get("head") == "new_foo" + assert lst.get("tail.head") == "new_bar" + assert lst.get("tail.tail.head") == "new_baz" + + lst["head"] = "newer_foo" + lst["tail.head"] = "newer_bar" + lst["tail.tail.head"] = "newer_baz" + + assert lst["head"] == "newer_foo" + assert lst["tail.head"] == "newer_bar" + assert lst["tail.tail.head"] == "newer_baz" + + +def test_set_path_not_found(cas_with_references_xmi, webanno_typesystem_xml): + typesystem = load_typesystem(webanno_typesystem_xml) + cas = load_cas_from_xmi(cas_with_references_xmi, typesystem=typesystem) + + result = cas.select("de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred") + assert len(result) == 1 + pred = result[0] + first_arg = pred.arguments[0] + + with pytest.raises(AttributeError): + first_arg.set("target.bar", 42) diff --git a/tests/test_xmi.py b/tests/test_xmi.py index 1602020..6d57749 100644 --- a/tests/test_xmi.py +++ b/tests/test_xmi.py @@ -19,6 +19,10 @@ (pytest.lazy_fixture("cas_with_reserved_names_xmi"), pytest.lazy_fixture("typesystem_with_reserved_names_xml")), (pytest.lazy_fixture("cas_with_two_sofas_xmi"), pytest.lazy_fixture("small_typesystem_xml")), (pytest.lazy_fixture("cas_with_smileys_xmi"), pytest.lazy_fixture("dkpro_typesystem_xml")), + ( + pytest.lazy_fixture("cas_has_fs_with_no_namespace_xmi"), + pytest.lazy_fixture("typesystem_has_types_with_no_namespace_xml"), + ), ] @@ -91,7 +95,7 @@ def test_deserializing_and_then_adding_annotations_works(small_xmi, small_typesy TokenType = typesystem.get_type("cassis.Token") cas = load_cas_from_xmi(small_xmi, typesystem=typesystem) - cas.add_annotation(TokenType(begin=0, end=3, id="0", pos="NNP")) + cas.add(TokenType(begin=0, end=3, id="0", pos="NNP")) # Check that serializing still works xmi = cas.to_xmi() @@ -194,7 +198,7 @@ def test_serializing_xmi_ignores_none_features(small_xmi, small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) cas = load_cas_from_xmi(small_xmi, typesystem=typesystem) TokenType = typesystem.get_type("cassis.Token") - cas.add_annotation(TokenType(xmiID=13, sofa=1, begin=0, end=3, id=None, pos=None)) + cas.add(TokenType(xmiID=13, sofa=1, begin=0, end=3, id=None, pos=None)) actual_xml = cas.to_xmi() @@ -208,10 +212,10 @@ def test_serializing_xmi_namespaces_with_same_prefixes_but_different_urls_are_di BarType = typesystem.create_type("bar.test.Bar") # Check that two annotations of the same type get the same namespace - cas.add_annotation(FooType()) - cas.add_annotation(BarType()) - cas.add_annotation(FooType()) - cas.add_annotation(BarType()) + cas.add(FooType()) + cas.add(BarType()) + cas.add(FooType()) + cas.add(BarType()) actual_xmi = cas.to_xmi() root = etree.fromstring(actual_xmi.encode("utf-8")) @@ -221,6 +225,22 @@ def test_serializing_xmi_namespaces_with_same_prefixes_but_different_urls_are_di assert len(root.xpath("//test0:Bar", namespaces=root.nsmap)) == 2 +def test_serializing_with_unset_xmi_ids_works(): + typesystem = TypeSystem() + cas = Cas(typesystem) + FooType = typesystem.create_type("foo.test.Foo") + typesystem.create_feature(FooType, "bar", "bar.test.Bar") + BarType = typesystem.create_type("bar.test.Bar") + + # Check that two annotations of the same type get the same namespace + foo = FooType() + cas.add(foo) + foo.bar = BarType() + + # assert no error + cas.to_xmi(pretty_print=True) + + # UIMA vs cassis offsets