diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index a011e97..d02b528 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -2,9 +2,9 @@ name: Run Tests
on:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
jobs:
build:
diff --git a/README.rst b/README.rst
index 4ba8be3..9eaca6b 100644
--- a/README.rst
+++ b/README.rst
@@ -136,7 +136,7 @@ Given a type system with a type :code:`cassis.Token` that has an :code:`id` and
]
for token in tokens:
- cas.add_annotation(token)
+ cas.add(token)
Selecting annotations
~~~~~~~~~~~~~~~~~~~~~
@@ -158,16 +158,25 @@ Selecting annotations
# Annotation values can be accessed as properties
print('Token: begin={0}, end={1}, id={2}, pos={3}'.format(token.begin, token.end, token.id, token.pos))
-Selecting nested features
+Getting and setting (nested) features
~~~~~~~~~~~~~~~~~~~~~~~~~
-If you have nested feature structures, e.g. a feature structure with feature :code:`a` that has a
+If you want to access a variable but only have its name as a string or have nested feature structures,
+e.g. a feature structure with feature :code:`a` that has a
feature :code:`b` that has a feature :code:`c`, some of which can be :code:`None`, then you can use the
following:
+.. code:: python
+
+ fs.get("var_name") # Or
+ fs["var_name"]
+
+Or in the nested case,
+
.. code:: python
fs.get("a.b.c")
+ fs["a.b.c"]
If :code:`a` or :code:`b` or :code:`c` are :code:`None`, then this returns instead of
@@ -183,6 +192,29 @@ Another example would be a StringList containing :code:`["Foo", "Bar", "Baz"]`:
assert lst.get("tail.tail.tail.head") == None
assert lst.get("tail.tail.tail.tail.head") == None
+The same goes for setting:
+
+.. code:: python
+
+ # Functional
+ lst.set("head", "new_foo")
+ lst.set("tail.head", "new_bar")
+ lst.set("tail.tail.head", "new_baz")
+
+ assert lst.get("head") == "new_foo"
+ assert lst.get("tail.head") == "new_bar"
+ assert lst.get("tail.tail.head") == "new_baz"
+
+ # Bracket access
+ lst["head"] = "newer_foo"
+ lst["tail.head"] = "newer_bar"
+ lst["tail.tail.head"] = "newer_baz"
+
+ assert lst["head"] == "newer_foo"
+ assert lst["tail.head"] == "newer_bar"
+ assert lst["tail.tail.head"] == "newer_baz"
+
+
Creating types and adding features
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -193,10 +225,10 @@ Creating types and adding features
typesystem = TypeSystem()
parent_type = typesystem.create_type(name='example.ParentType')
- typesystem.add_feature(type_=parent_type, name='parentFeature', rangeTypeName='String')
+ typesystem.create_feature(type_=parent_type, name='parentFeature', rangeTypeName='String')
child_type = typesystem.create_type(name='example.ChildType', supertypeName=parent_type.name)
- typesystem.add_feature(type_=child_type, name='childFeature', rangeTypeName='Integer')
+ typesystem.create_feature(type_=child_type, name='childFeature', rangeTypeName='Integer')
annotation = child_type(parentFeature='parent', childFeature='child')
@@ -242,7 +274,7 @@ as a :code:`Cas` .
cas = Cas()
cas.sofa_string = "I like cheese ."
- cas.add_annotations([
+ cas.add_all([
Token(begin=0, end=1),
Token(begin=2, end=6),
Token(begin=7, end=13),
@@ -255,7 +287,7 @@ as a :code:`Cas` .
view = cas.create_view('testView')
view.sofa_string = "I like blackcurrant ."
- view.add_annotations([
+ view.add_all([
Token(begin=0, end=1),
Token(begin=2, end=6),
Token(begin=7, end=19),
@@ -318,8 +350,8 @@ available as a member variable :code:`self_` or :code:`type_` on the respective
typesystem = TypeSystem()
ExampleType = typesystem.create_type(name='example.Type')
- typesystem.add_feature(type_=ExampleType, name='self', rangeTypeName='String')
- typesystem.add_feature(type_=ExampleType, name='type', rangeTypeName='String')
+ typesystem.create_feature(type_=ExampleType, name='self', rangeTypeName='String')
+ typesystem.create_feature(type_=ExampleType, name='type', rangeTypeName='String')
annotation = ExampleType(self_="Test string1", type_="Test string2")
diff --git a/cassis/cas.py b/cassis/cas.py
index 999771d..cccc427 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -270,7 +270,7 @@ def views(self) -> List[View]:
"""
return list(self._views.values())
- def add_annotation(self, annotation: FeatureStructure, keep_id: Optional[bool] = True):
+ def add(self, annotation: FeatureStructure, keep_id: Optional[bool] = True):
"""Adds an annotation to this Cas.
Args:
@@ -294,7 +294,18 @@ def add_annotation(self, annotation: FeatureStructure, keep_id: Optional[bool] =
self._current_view.add_annotation_to_index(annotation)
- def add_annotations(self, annotations: Iterable[FeatureStructure]):
+ @deprecation.deprecated(details="Use add()")
+ def add_annotation(self, annotation: FeatureStructure, keep_id: Optional[bool] = True):
+ """Adds an annotation to this Cas.
+
+ Args:
+ annotation: The annotation to add.
+ keep_id: Keep the XMI id of `annotation` if true, else generate a new one.
+
+ """
+ self.add(annotation, keep_id)
+
+ def add_all(self, annotations: Iterable[FeatureStructure]):
"""Adds several annotations at once to this CAS.
Args:
@@ -302,9 +313,19 @@ def add_annotations(self, annotations: Iterable[FeatureStructure]):
"""
for annotation in annotations:
- self.add_annotation(annotation)
+ self.add(annotation)
- def remove_annotation(self, annotation: FeatureStructure):
+ @deprecation.deprecated(details="Use add_all()")
+ def add_annotations(self, annotations: Iterable[FeatureStructure]):
+ """Adds several annotations at once to this CAS.
+
+ Args:
+ annotations: An iterable of annotations to add.
+
+ """
+ self.add_all(annotations)
+
+ def remove(self, annotation: FeatureStructure):
"""Removes an annotation from an index. This throws if the
annotation was not present.
@@ -313,6 +334,16 @@ def remove_annotation(self, annotation: FeatureStructure):
"""
self._current_view.remove_annotation_from_index(annotation)
+ @deprecation.deprecated(details="Use remove()")
+ def remove_annotation(self, annotation: FeatureStructure):
+ """Removes an annotation from an index. This throws if the
+ annotation was not present.
+
+ Args:
+ annotation: The annotation to remove.
+ """
+ self.remove(annotation)
+
@deprecation.deprecated(details="Use annotation.get_covered_text()")
def get_covered_text(self, annotation: FeatureStructure) -> str:
"""Gets the text that is covered by `annotation`.
diff --git a/cassis/typesystem.py b/cassis/typesystem.py
index fcac6f6..dc22017 100644
--- a/cassis/typesystem.py
+++ b/cassis/typesystem.py
@@ -7,6 +7,7 @@
from typing import IO, Any, Callable, Dict, Iterable, Iterator, List, Optional, Set, Union
import attr
+import deprecation
from lxml import etree
from more_itertools import unique_everseen
from toposort import toposort_flatten
@@ -205,6 +206,13 @@ def get_covered_text(self) -> str:
raise NotImplementedError()
def get(self, path: str) -> Optional[Any]:
+ """ Recursively gets an attribute, e.g. fs.get("a.b.c") would return attribute `c` of `b` of `a`.
+
+ If you have nested feature structures, e.g. a feature structure with feature `a` that has a feature `b` that
+ has a feature `c`, some of which can be `None`, then you can use the following:
+
+ fs.get("a.b.c")
+ """
cur = self
for part in path.split("."):
cur = getattr(cur, part, None)
@@ -213,6 +221,31 @@ def get(self, path: str) -> Optional[Any]:
return cur
+ def set(self, path: str, value: Any):
+ """ Recursively sets an attribute, e.g. fs.set("a.b.c", 42) would set attribute `c` of `b` of `a` to `42`. """
+
+ if "." not in path:
+ setattr(self, path, value)
+ return
+
+ idx = path.rindex(".")
+
+ value_name = path[idx + 1 :]
+ path = path[:idx]
+
+ target = self.get(path)
+
+ if target is None:
+ raise AttributeError(f"Attribute with name [{value_name}] not found on: {target}")
+
+ setattr(target, value_name, value)
+
+ def __getitem__(self, key):
+ return self.get(key)
+
+ def __setitem__(self, key, value):
+ return self.set(key, value)
+
def __hash__(self):
return self.xmiID
@@ -432,7 +465,7 @@ def __init__(self, add_document_annotation_type: bool = True):
# Array
t = self.create_type(name="uima.cas.ArrayBase", supertypeName="uima.cas.TOP")
- self.add_feature(t, name="elements", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
+ self.create_feature(t, name="elements", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
self.create_type(name="uima.cas.FSArray", supertypeName="uima.cas.ArrayBase")
self.create_type(name="uima.cas.BooleanArray", supertypeName="uima.cas.ArrayBase")
@@ -449,52 +482,47 @@ def __init__(self, add_document_annotation_type: bool = True):
self.create_type(name="uima.cas.FSList", supertypeName="uima.cas.ListBase")
self.create_type(name="uima.cas.EmptyFSList", supertypeName="uima.cas.FSList")
t = self.create_type(name="uima.cas.NonEmptyFSList", supertypeName="uima.cas.FSList")
- self.add_feature(t, name="head", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
- self.add_feature(t, name="tail", rangeTypeName="uima.cas.FSList", multipleReferencesAllowed=True)
+ self.create_feature(t, name="head", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
+ self.create_feature(t, name="tail", rangeTypeName="uima.cas.FSList", multipleReferencesAllowed=True)
# FloatList
self.create_type(name="uima.cas.FloatList", supertypeName="uima.cas.ListBase")
self.create_type(name="uima.cas.EmptyFloatList", supertypeName="uima.cas.FloatList")
t = self.create_type(name="uima.cas.NonEmptyFloatList", supertypeName="uima.cas.FloatList")
- self.add_feature(t, name="head", rangeTypeName="uima.cas.Float")
- self.add_feature(t, name="tail", rangeTypeName="uima.cas.FloatList", multipleReferencesAllowed=True)
+ self.create_feature(t, name="head", rangeTypeName="uima.cas.Float")
+ self.create_feature(t, name="tail", rangeTypeName="uima.cas.FloatList", multipleReferencesAllowed=True)
# IntegerList
self.create_type(name="uima.cas.IntegerList", supertypeName="uima.cas.ListBase")
self.create_type(name="uima.cas.EmptyIntegerList", supertypeName="uima.cas.IntegerList")
t = self.create_type(name="uima.cas.NonEmptyIntegerList", supertypeName="uima.cas.IntegerList")
- self.add_feature(t, name="head", rangeTypeName="uima.cas.Integer")
- self.add_feature(t, name="tail", rangeTypeName="uima.cas.IntegerList", multipleReferencesAllowed=True)
+ self.create_feature(t, name="head", rangeTypeName="uima.cas.Integer")
+ self.create_feature(t, name="tail", rangeTypeName="uima.cas.IntegerList", multipleReferencesAllowed=True)
# StringList
self.create_type(name="uima.cas.StringList", supertypeName="uima.cas.ListBase")
self.create_type(name="uima.cas.EmptyStringList", supertypeName="uima.cas.StringList")
t = self.create_type(name="uima.cas.NonEmptyStringList", supertypeName="uima.cas.StringList")
- self.add_feature(t, name="head", rangeTypeName="uima.cas.String")
- self.add_feature(t, name="tail", rangeTypeName="uima.cas.StringList", multipleReferencesAllowed=True)
+ self.create_feature(t, name="head", rangeTypeName="uima.cas.String")
+ self.create_feature(t, name="tail", rangeTypeName="uima.cas.StringList", multipleReferencesAllowed=True)
# Sofa
t = self.create_type(name="uima.cas.Sofa", supertypeName="uima.cas.TOP")
- self.add_feature(t, name="sofaNum", rangeTypeName="uima.cas.Integer")
- self.add_feature(t, name="sofaID", rangeTypeName="uima.cas.String")
- self.add_feature(t, name="mimeType", rangeTypeName="uima.cas.String")
- self.add_feature(
- t,
- name="sofaArray",
- rangeTypeName="uima.cas.TOP",
- multipleReferencesAllowed=True,
- )
- self.add_feature(t, name="sofaString", rangeTypeName="uima.cas.String")
- self.add_feature(t, name="sofaURI", rangeTypeName="uima.cas.String")
+ self.create_feature(t, name="sofaNum", rangeTypeName="uima.cas.Integer")
+ self.create_feature(t, name="sofaID", rangeTypeName="uima.cas.String")
+ self.create_feature(t, name="mimeType", rangeTypeName="uima.cas.String")
+ self.create_feature(t, name="sofaArray", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
+ self.create_feature(t, name="sofaString", rangeTypeName="uima.cas.String")
+ self.create_feature(t, name="sofaURI", rangeTypeName="uima.cas.String")
# AnnotationBase
t = self.create_type(name="uima.cas.AnnotationBase", supertypeName="uima.cas.TOP")
- self.add_feature(t, name="sofa", rangeTypeName="uima.cas.Sofa")
+ self.create_feature(t, name="sofa", rangeTypeName="uima.cas.Sofa")
# Annotation
t = self.create_type(name="uima.tcas.Annotation", supertypeName="uima.cas.AnnotationBase")
- self.add_feature(t, name="begin", rangeTypeName="uima.cas.Integer")
- self.add_feature(t, name="end", rangeTypeName="uima.cas.Integer")
+ self.create_feature(t, name="begin", rangeTypeName="uima.cas.Integer")
+ self.create_feature(t, name="end", rangeTypeName="uima.cas.Integer")
if add_document_annotation_type:
self._add_document_annotation_type()
@@ -646,7 +674,7 @@ def subsumes(self, parent_name: str, child_name: str) -> bool:
return False
- def add_feature(
+ def create_feature(
self,
type_: Type,
name: str,
@@ -692,6 +720,32 @@ def add_feature(
type_.add_feature(feature)
+ @deprecation.deprecated(details="Use create_feature")
+ def add_feature(
+ self,
+ type_: Type,
+ name: str,
+ rangeTypeName: str,
+ elementType: str = None,
+ description: str = None,
+ multipleReferencesAllowed: bool = None,
+ ):
+ """Adds a feature to the given type.
+ Args:
+ type_: The type to which the feature will be added
+ name: The name of the new feature
+ rangeTypeName: The feature's rangeTypeName specifies the type of value that the feature can take.
+ elementType: The elementType of a feature is optional, and applies only when the rangeTypeName
+ is uima.cas.FSArray or uima.cas.FSList The elementType specifies what type of value can be
+ assigned as an element of the array or list.
+ description: The description of the new feature
+ multipleReferencesAllowed: Setting this to true indicates that the array or list may be shared,
+ so changes to it may affect other objects in the CAS.
+ Raises:
+ Exception: If a feature with name `name` already exists in `type_`.
+ """
+ self.create_feature(type_, name, rangeTypeName, elementType, description, multipleReferencesAllowed)
+
def to_xml(self, path: Union[str, Path, None] = None) -> Optional[str]:
"""Creates a XMI representation of this type system.
@@ -751,7 +805,7 @@ def _defines_predefined_type(self, type_name):
def _add_document_annotation_type(self):
t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName="uima.tcas.Annotation")
- self.add_feature(t, name="language", rangeTypeName="uima.cas.String")
+ self.create_feature(t, name="language", rangeTypeName="uima.cas.String")
# Deserializing
@@ -803,12 +857,6 @@ def deserialize(self, source: Union[IO, str]) -> TypeSystem:
description = self._get_elem_as_str(elem.find("{*}description"))
supertypeName = self._get_elem_as_str(elem.find("{*}supertypeName"))
- if "." not in type_name:
- type_name = "uima.noNamespace." + type_name
-
- if "." not in supertypeName:
- supertypeName = "uima.noNamespace." + supertypeName
-
types[type_name] = Type(name=type_name, supertypeName=supertypeName, description=description)
type_dependencies[type_name].add(supertypeName)
@@ -886,7 +934,7 @@ def deserialize(self, source: Union[IO, str]) -> TypeSystem:
# between type references in inheritance and type references in range or element type.
for t in created_types:
for f in features[t.name]:
- ts.add_feature(
+ ts.create_feature(
t,
name=f.name,
rangeTypeName=f.rangeTypeName,
@@ -944,20 +992,13 @@ def _serialize_type(self, xf: IO, type_: Type):
typeDescription = etree.Element("typeDescription")
name = etree.SubElement(typeDescription, "name")
- type_name = type_.name
- if type_name.startswith("uima.noNamespace."):
- type_name = type_name.replace("uima.noNamespace.", "")
-
- name.text = type_name
+ name.text = type_.name
description = etree.SubElement(typeDescription, "description")
description.text = type_.description
supertype_name_node = etree.SubElement(typeDescription, "supertypeName")
- supertype_name = type_.supertypeName
- if supertype_name.startswith("uima.noNamespace."):
- supertype_name = supertype_name.replace("uima.noNamespace.", "")
- supertype_name_node.text = supertype_name
+ supertype_name_node.text = type_.supertypeName
# Only create the `feature` element if there is at least one feature
feature_list = list(type_.features)
diff --git a/cassis/xmi.py b/cassis/xmi.py
index 8656c9d..2e2630a 100644
--- a/cassis/xmi.py
+++ b/cassis/xmi.py
@@ -235,7 +235,7 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b
fs.begin = sofa._offset_converter.uima_to_cassis(fs.begin)
fs.end = sofa._offset_converter.uima_to_cassis(fs.end)
- view.add_annotation(fs, keep_id=True)
+ view.add(fs, keep_id=True)
cas._xmi_id_generator = IdGenerator(self._max_xmi_id + 1)
cas._sofa_num_generator = IdGenerator(self._max_sofa_num + 1)
@@ -262,9 +262,12 @@ def _parse_view(self, elem) -> ProtoView:
def _parse_feature_structure(self, typesystem: TypeSystem, elem, children: Dict[str, List[str]]):
# Strip the http prefix, replace / with ., remove the ecore part
# TODO: Error checking
- typename = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip()
+ type_name: str = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip()
- AnnotationType = typesystem.get_type(typename)
+ if type_name.startswith("uima.noNamespace."):
+ type_name = type_name[17:]
+
+ AnnotationType = typesystem.get_type(type_name)
attributes = dict(elem.attrib)
attributes.update(children)
@@ -339,6 +342,11 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
self._serialize_cas_null(root)
+ # Generate XMI ids for unset ones
+ for fs in cas._find_all_fs():
+ if fs.xmiID is None:
+ fs.xmiID = cas._get_next_xmi_id()
+
# Find all fs, even the ones that are not directly added to a sofa
for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID):
self._serialize_feature_structure(cas, root, fs)
@@ -363,8 +371,12 @@ def _serialize_cas_null(self, root: etree.Element):
def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: FeatureStructure):
ts = cas.typesystem
+ type_name = fs.type
+ if "." not in type_name:
+ type_name = f"uima.noNamespace.{type_name}"
+
# The type name is a Java package, e.g. `org.myproj.Foo`.
- parts = fs.type.split(".")
+ parts = type_name.split(".")
# The CAS type namespace is converted to an XML namespace URI by the following rule:
# replace all dots with slashes, prepend http:///, and append .ecore.
diff --git a/tests/fixtures.py b/tests/fixtures.py
index e13ed24..3a670f3 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -160,6 +160,20 @@ def cas_with_leniency_xmi(cas_with_leniency_path):
return f.read()
+# CAS and with feature structures whose types have no namespace
+
+
+@pytest.fixture
+def cas_has_fs_with_no_namespace_path():
+ return os.path.join(FIXTURE_DIR, "xmi", "cas_has_fs_with_no_namespace.xmi")
+
+
+@pytest.fixture
+def cas_has_fs_with_no_namespace_xmi(cas_has_fs_with_no_namespace_path):
+ with open(cas_has_fs_with_no_namespace_path, "r") as f:
+ return f.read()
+
+
# Small type system
@@ -334,7 +348,7 @@ def tokens(small_typesystem_xml):
]
for token in tokens:
- cas.add_annotation(token)
+ cas.add(token)
return tokens
@@ -350,6 +364,6 @@ def sentences(small_typesystem_xml):
sentences = [SentenceType(begin=0, end=26, id="0"), SentenceType(begin=27, end=47, id="1")]
for sentence in sentences:
- cas.add_annotation(sentence)
+ cas.add(sentence)
return sentences
diff --git a/tests/test_cas.py b/tests/test_cas.py
index 2c1986b..11ad0a2 100644
--- a/tests/test_cas.py
+++ b/tests/test_cas.py
@@ -112,7 +112,7 @@ def test_sofa_uri_can_be_set_and_read():
def test_select(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
- cas.add_annotations(tokens + sentences)
+ cas.add_all(tokens + sentences)
actual_tokens = list(cas.select("cassis.Token"))
actual_sentences = list(cas.select("cassis.Sentence"))
@@ -124,7 +124,7 @@ def test_select(small_typesystem_xml, tokens, sentences):
def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sentences):
annotations = tokens + sentences
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
- cas.add_annotations(annotations)
+ cas.add_all(annotations)
actual_annotations = list(cas.select("uima.tcas.Annotation"))
@@ -133,7 +133,7 @@ def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sent
def test_select_covered(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
- cas.add_annotations(tokens + sentences)
+ cas.add_all(tokens + sentences)
first_sentence, second_sentence = sentences
tokens_in_first_sentence = tokens[:6]
tokens_in_second_sentence = tokens[6:]
@@ -153,8 +153,8 @@ def test_select_covered_overlapping(small_typesystem_xml, tokens, sentences):
sentence = SentenceType(begin=0, end=10)
annotations = [AnnotationType(begin=0, end=5), AnnotationType(begin=0, end=5)]
- cas.add_annotation(sentence)
- cas.add_annotations(annotations)
+ cas.add(sentence)
+ cas.add_all(annotations)
actual_annotations = list(cas.select_covered("test.Annotation", sentence))
@@ -172,7 +172,7 @@ def test_select_covered_also_returns_parent_instances(small_typesystem_xml, toke
annotations.append(subtoken2)
cas = Cas(typesystem=typesystem)
- cas.add_annotations(annotations)
+ cas.add_all(annotations)
first_sentence, second_sentence = sentences
tokens_in_first_sentence = tokens[:6]
@@ -187,7 +187,7 @@ def test_select_covered_also_returns_parent_instances(small_typesystem_xml, toke
def test_select_covering(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
- cas.add_annotations(tokens + sentences)
+ cas.add_all(tokens + sentences)
actual_first_sentence, actual_second_sentence = sentences
tokens_in_first_sentence = tokens[:6]
tokens_in_second_sentence = tokens[6:]
@@ -219,7 +219,7 @@ def test_select_covering_also_returns_parent_instances(small_typesystem_xml, tok
subsentence2 = SubSentenceType(begin=second_sentence.begin, end=second_sentence.end)
annotations.append(subsentence1)
annotations.append(subsentence2)
- cas.add_annotations(annotations)
+ cas.add_all(annotations)
tokens_in_first_sentence = tokens[:6]
tokens_in_second_sentence = tokens[6:]
@@ -237,9 +237,9 @@ def test_select_covering_also_returns_parent_instances(small_typesystem_xml, tok
def test_select_only_returns_annotations_of_current_view(tokens, sentences, small_typesystem_xml):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
- cas.add_annotations(tokens)
+ cas.add_all(tokens)
view = cas.create_view("testView")
- view.add_annotations(sentences)
+ view.add_all(sentences)
actual_annotations_in_initial_view = list(cas.get_view("_InitialView").select_all())
actual_annotations_in_test_view = list(cas.get_view("testView").select_all())
@@ -257,47 +257,6 @@ def test_select_returns_feature_structures(cas_with_collections_xmi, typesystem_
assert len(arrs) == 1
-# Get with path selector
-
-
-def test_get_path_semargs(cas_with_references_xmi, webanno_typesystem_xml):
- typesystem = load_typesystem(webanno_typesystem_xml)
- cas = load_cas_from_xmi(cas_with_references_xmi, typesystem=typesystem)
-
- result = cas.select("de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred")
- assert len(result) == 1
- pred = result[0]
- first_arg = pred.arguments[0]
-
- end = first_arg.get("target.end")
-
- assert end == 5
-
-
-def test_get_path_stringlist():
- cas = Cas()
-
- NonEmptyStringList = cas.typesystem.get_type("uima.cas.NonEmptyStringList")
- EmptyStringList = cas.typesystem.get_type("uima.cas.EmptyStringList")
-
- data = ["foo", "bar", "baz"]
- lst = NonEmptyStringList()
-
- cur = lst
- for s in data:
- cur.head = s
- cur.tail = NonEmptyStringList()
- cur = cur.tail
-
- cur.tail = EmptyStringList()
- print(lst)
-
- assert lst.get("head") == "foo"
- assert lst.get("tail.head") == "bar"
- assert lst.get("tail.tail.head") == "baz"
- assert lst.get("tail.tail.tail.head") is None
-
-
# Covered text
@@ -359,7 +318,7 @@ def test_add_annotation(small_typesystem_xml):
TokenType(begin=25, end=26, id="5", pos="."),
]
for token in tokens:
- cas.add_annotation(token)
+ cas.add(token)
actual_tokens = list(cas.select(TokenType.name))
assert actual_tokens == tokens
@@ -379,7 +338,7 @@ def test_add_annotation_generates_ids(small_typesystem_xml, tokens):
TokenType(begin=25, end=26, id="5", pos="."),
]
for token in tokens:
- cas.add_annotation(token)
+ cas.add(token)
actual_tokens = list(cas.select(TokenType.name))
assert all([token.xmiID is not None for token in actual_tokens])
@@ -393,7 +352,7 @@ def test_annotations_are_ordered_correctly(small_typesystem_xml, tokens):
random.shuffle(list(annotations))
for token in annotations:
- cas.add_annotation(token)
+ cas.add(token)
actual_tokens = list(cas.select("cassis.Token"))
@@ -408,7 +367,7 @@ def test_leniency_type_not_in_typeystem_not_lenient(small_typesystem_xml):
cas = Cas()
with pytest.raises(RuntimeError, match="Typesystem of CAS does not contain type"):
- cas.add_annotation(token)
+ cas.add(token)
def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml):
@@ -418,7 +377,7 @@ def test_leniency_type_not_in_typeystem_lenient(small_typesystem_xml):
token = TokenType(begin=0, end=3, id="0", pos="NNP")
cas = Cas(lenient=True)
- cas.add_annotation(token)
+ cas.add(token)
def test_select_returns_children_fs_instances(cas_with_inheritance_xmi, typesystem_with_inheritance_xml):
@@ -439,16 +398,16 @@ def test_select_returns_children_fs_instances(cas_with_inheritance_xmi, typesyst
def test_removing_of_existing_fs_works(small_typesystem_xml, tokens, sentences):
annotations = tokens + sentences
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
- cas.add_annotations(annotations)
+ cas.add_all(annotations)
for token in tokens:
- cas.remove_annotation(token)
+ cas.remove(token)
actual_annotations = list(cas.select("uima.tcas.Annotation"))
assert set(actual_annotations) == set(sentences)
for sentence in sentences:
- cas.remove_annotation(sentence)
+ cas.remove(sentence)
actual_annotations = list(cas.select("uima.tcas.Annotation"))
assert set(actual_annotations) == set()
@@ -459,11 +418,11 @@ def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
view = cas.create_view("testView")
- cas.add_annotations(annotations)
- view.add_annotations(annotations)
+ cas.add_all(annotations)
+ view.add_all(annotations)
for annotation in annotations:
- cas.remove_annotation(annotation)
+ cas.remove(annotation)
assert set(cas.select("uima.tcas.Annotation")) == set()
assert set(view.select("uima.tcas.Annotation")) == set(annotations)
@@ -473,14 +432,14 @@ def test_removing_throws_if_fs_not_found(small_typesystem_xml, tokens, sentences
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
with pytest.raises(ValueError):
- cas.remove_annotation(tokens[0])
+ cas.remove(tokens[0])
def test_removing_throws_if_fs_in_other_view(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
- cas.add_annotations(tokens)
+ cas.add_all(tokens)
view = cas.create_view("testView")
with pytest.raises(ValueError):
- view.remove_annotation(tokens[0])
+ view.remove(tokens[0])
diff --git a/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml b/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml
index 6805a8d..245aa76 100644
--- a/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml
+++ b/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml
@@ -2,41 +2,18 @@
- ArtifactID
+ TypeWithNoNameSpace
A unique artifact identifier.
- uima.cas.TOP
+ uima.tcas.Annotation
- artifactID
- A unique identification string for the artifact. This should be the file name for files,
- or the unique identifier used in a database if the document source is a database
- collection reader.
+ testFeature
+ I am the testFeatureDescription
uima.cas.String
-
- ArtifactMetadata
- A piece of metadata about the artifact in the form of a key value pair.
- uima.cas.TOP
-
-
- key
-
- uima.cas.String
-
-
- value
-
- uima.cas.String
-
-
-
-
- ChildOfArtifactID
- ArtifactID
-
-
+
diff --git a/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi b/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi
new file mode 100644
index 0000000..ed9748b
--- /dev/null
+++ b/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/test_typesystem.py b/tests/test_typesystem.py
index 5e4ba66..39eaf5d 100644
--- a/tests/test_typesystem.py
+++ b/tests/test_typesystem.py
@@ -27,7 +27,7 @@ def test_feature_can_be_added():
typesystem = TypeSystem()
test_type = typesystem.create_type(name="test.Type")
- typesystem.add_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature")
+ typesystem.create_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature")
actual_type = typesystem.get_type("test.Type")
actual_feature = actual_type.get_feature("testFeature")
@@ -41,9 +41,9 @@ def test_feature_adding_warns_if_redefined_identically():
test_type = typesystem.create_type(name="test.Type")
- typesystem.add_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature")
+ typesystem.create_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature")
with pytest.warns(UserWarning):
- typesystem.add_feature(
+ typesystem.create_feature(
type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature"
)
@@ -52,10 +52,10 @@ def test_feature_adding_throws_if_redefined_differently():
typesystem = TypeSystem()
test_type = typesystem.create_type(name="test.Type")
- typesystem.add_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature")
+ typesystem.create_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature")
with pytest.raises(ValueError):
- typesystem.add_feature(
+ typesystem.create_feature(
type_=test_type, name="testFeature", rangeTypeName="Boolean", description="A test feature"
)
@@ -75,7 +75,7 @@ def test_type_can_be_created():
def test_type_can_create_instances():
typesystem = TypeSystem()
test_type = typesystem.create_type(name="test.Type")
- typesystem.add_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature")
+ typesystem.create_feature(type_=test_type, name="testFeature", rangeTypeName="String", description="A test feature")
annotation = test_type(begin=0, end=42, testFeature="testValue")
@@ -88,10 +88,10 @@ def test_type_can_create_instance_with_inherited_fields():
typesystem = TypeSystem()
parent_type = typesystem.create_type(name="test.ParentType")
- typesystem.add_feature(type_=parent_type, name="parentFeature", rangeTypeName="String")
+ typesystem.create_feature(type_=parent_type, name="parentFeature", rangeTypeName="String")
child_type = typesystem.create_type(name="test.ChildType", supertypeName=parent_type.name)
- typesystem.add_feature(type_=child_type, name="childFeature", rangeTypeName="Integer")
+ typesystem.create_feature(type_=child_type, name="childFeature", rangeTypeName="Integer")
annotation = child_type(parentFeature="parent", childFeature="child")
@@ -500,7 +500,7 @@ def test_that_merging_compatible_typesystem_works(name, rangeTypeName, elementTy
ts = TypeSystem()
t = ts.create_type("test.ArraysAndListsWithElementTypes", supertypeName="uima.cas.TOP")
- ts.add_feature(
+ ts.create_feature(
type_=t,
name=name,
rangeTypeName=rangeTypeName,
@@ -533,7 +533,7 @@ def test_that_merging_incompatible_typesystem_throws(name, rangeTypeName, elemen
ts = TypeSystem()
t = ts.create_type("test.ArraysAndListsWithElementTypes", supertypeName="uima.cas.TOP")
- ts.add_feature(
+ ts.create_feature(
type_=t,
name=name,
rangeTypeName=rangeTypeName,
@@ -596,8 +596,8 @@ def test_typchecking_fs_array():
MyOtherValue = cas.typesystem.create_type(name="test.MyOtherValue", supertypeName="uima.cas.TOP")
MyCollection = cas.typesystem.create_type("test.MyCollection", supertypeName="uima.cas.TOP")
- cas.typesystem.add_feature(type_=MyValue, name="value", rangeTypeName="uima.cas.String")
- cas.typesystem.add_feature(
+ cas.typesystem.create_feature(type_=MyValue, name="value", rangeTypeName="uima.cas.String")
+ cas.typesystem.create_feature(
type_=MyCollection, name="members", rangeTypeName="uima.cas.FSArray", elementType="test.MyValue"
)
@@ -605,7 +605,7 @@ def test_typchecking_fs_array():
collection = MyCollection(members=members)
- cas.add_annotation(collection)
+ cas.add(collection)
errors = cas.typecheck()
@@ -614,3 +614,80 @@ def test_typchecking_fs_array():
2, "Member of [uima.cas.FSArray] has unsound type: was [test.MyOtherValue], need [test.MyValue]!"
)
assert errors[0] == expected_error
+
+
+# Getting/Setting with path selector
+
+
+def test_get_set_path_semargs(cas_with_references_xmi, webanno_typesystem_xml):
+ typesystem = load_typesystem(webanno_typesystem_xml)
+ cas = load_cas_from_xmi(cas_with_references_xmi, typesystem=typesystem)
+
+ result = cas.select("de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred")
+ assert len(result) == 1
+ pred = result[0]
+ first_arg = pred.arguments[0]
+
+ assert first_arg.get("target.end") == 5
+ first_arg.set("target.end", 42)
+ assert first_arg.get("target.end") == 42
+
+ assert first_arg["target.end"] == 42
+ first_arg["target.end"] = 23
+ assert first_arg["target.end"] == 23
+
+
+def test_get_set_path_stringlist():
+ cas = Cas()
+
+ NonEmptyStringList = cas.typesystem.get_type("uima.cas.NonEmptyStringList")
+ EmptyStringList = cas.typesystem.get_type("uima.cas.EmptyStringList")
+
+ data = ["foo", "bar", "baz"]
+ lst = NonEmptyStringList()
+
+ cur = lst
+ for s in data:
+ cur.head = s
+ cur.tail = NonEmptyStringList()
+ cur = cur.tail
+ cur.tail = EmptyStringList()
+
+ assert lst.get("head") == "foo"
+ assert lst.get("tail.head") == "bar"
+ assert lst.get("tail.tail.head") == "baz"
+ assert lst.get("tail.tail.tail.head") is None
+
+ assert lst["head"] == "foo"
+ assert lst["tail.head"] == "bar"
+ assert lst["tail.tail.head"] == "baz"
+ assert lst["tail.tail.tail.head"] is None
+
+ lst.set("head", "new_foo")
+ lst.set("tail.head", "new_bar")
+ lst.set("tail.tail.head", "new_baz")
+
+ assert lst.get("head") == "new_foo"
+ assert lst.get("tail.head") == "new_bar"
+ assert lst.get("tail.tail.head") == "new_baz"
+
+ lst["head"] = "newer_foo"
+ lst["tail.head"] = "newer_bar"
+ lst["tail.tail.head"] = "newer_baz"
+
+ assert lst["head"] == "newer_foo"
+ assert lst["tail.head"] == "newer_bar"
+ assert lst["tail.tail.head"] == "newer_baz"
+
+
+def test_set_path_not_found(cas_with_references_xmi, webanno_typesystem_xml):
+ typesystem = load_typesystem(webanno_typesystem_xml)
+ cas = load_cas_from_xmi(cas_with_references_xmi, typesystem=typesystem)
+
+ result = cas.select("de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred")
+ assert len(result) == 1
+ pred = result[0]
+ first_arg = pred.arguments[0]
+
+ with pytest.raises(AttributeError):
+ first_arg.set("target.bar", 42)
diff --git a/tests/test_xmi.py b/tests/test_xmi.py
index 1602020..6d57749 100644
--- a/tests/test_xmi.py
+++ b/tests/test_xmi.py
@@ -19,6 +19,10 @@
(pytest.lazy_fixture("cas_with_reserved_names_xmi"), pytest.lazy_fixture("typesystem_with_reserved_names_xml")),
(pytest.lazy_fixture("cas_with_two_sofas_xmi"), pytest.lazy_fixture("small_typesystem_xml")),
(pytest.lazy_fixture("cas_with_smileys_xmi"), pytest.lazy_fixture("dkpro_typesystem_xml")),
+ (
+ pytest.lazy_fixture("cas_has_fs_with_no_namespace_xmi"),
+ pytest.lazy_fixture("typesystem_has_types_with_no_namespace_xml"),
+ ),
]
@@ -91,7 +95,7 @@ def test_deserializing_and_then_adding_annotations_works(small_xmi, small_typesy
TokenType = typesystem.get_type("cassis.Token")
cas = load_cas_from_xmi(small_xmi, typesystem=typesystem)
- cas.add_annotation(TokenType(begin=0, end=3, id="0", pos="NNP"))
+ cas.add(TokenType(begin=0, end=3, id="0", pos="NNP"))
# Check that serializing still works
xmi = cas.to_xmi()
@@ -194,7 +198,7 @@ def test_serializing_xmi_ignores_none_features(small_xmi, small_typesystem_xml):
typesystem = load_typesystem(small_typesystem_xml)
cas = load_cas_from_xmi(small_xmi, typesystem=typesystem)
TokenType = typesystem.get_type("cassis.Token")
- cas.add_annotation(TokenType(xmiID=13, sofa=1, begin=0, end=3, id=None, pos=None))
+ cas.add(TokenType(xmiID=13, sofa=1, begin=0, end=3, id=None, pos=None))
actual_xml = cas.to_xmi()
@@ -208,10 +212,10 @@ def test_serializing_xmi_namespaces_with_same_prefixes_but_different_urls_are_di
BarType = typesystem.create_type("bar.test.Bar")
# Check that two annotations of the same type get the same namespace
- cas.add_annotation(FooType())
- cas.add_annotation(BarType())
- cas.add_annotation(FooType())
- cas.add_annotation(BarType())
+ cas.add(FooType())
+ cas.add(BarType())
+ cas.add(FooType())
+ cas.add(BarType())
actual_xmi = cas.to_xmi()
root = etree.fromstring(actual_xmi.encode("utf-8"))
@@ -221,6 +225,22 @@ def test_serializing_xmi_namespaces_with_same_prefixes_but_different_urls_are_di
assert len(root.xpath("//test0:Bar", namespaces=root.nsmap)) == 2
+def test_serializing_with_unset_xmi_ids_works():
+ typesystem = TypeSystem()
+ cas = Cas(typesystem)
+ FooType = typesystem.create_type("foo.test.Foo")
+ typesystem.create_feature(FooType, "bar", "bar.test.Bar")
+ BarType = typesystem.create_type("bar.test.Bar")
+
+ # Check that two annotations of the same type get the same namespace
+ foo = FooType()
+ cas.add(foo)
+ foo.bar = BarType()
+
+ # assert no error
+ cas.to_xmi(pretty_print=True)
+
+
# UIMA vs cassis offsets