Skip to content

Commit

Permalink
Merge pull request #243 from dkpro/bugfix/242-Array-features-are-wrap…
Browse files Browse the repository at this point in the history
…ped-in-extra-array

#242 - Array features are wrapped in extra array
  • Loading branch information
reckart authored Dec 15, 2021
2 parents 0753e05 + 4564886 commit 2a51991
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 7 deletions.
12 changes: 10 additions & 2 deletions cassis/cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,16 @@
from attr import validators
from sortedcontainers import SortedKeyList

from cassis.typesystem import TYPE_NAME_SOFA, FeatureStructure, TypeCheckError, TypeSystem, TYPE_NAME_FS_LIST, \
TYPE_NAME_FS_ARRAY, FEATURE_BASE_NAME_HEAD, TypeSystemMode
from cassis.typesystem import (
FEATURE_BASE_NAME_HEAD,
TYPE_NAME_FS_ARRAY,
TYPE_NAME_FS_LIST,
TYPE_NAME_SOFA,
FeatureStructure,
TypeCheckError,
TypeSystem,
TypeSystemMode,
)

_validator_optional_string = validators.optional(validators.instance_of(str))

Expand Down
1 change: 1 addition & 0 deletions cassis/typesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@

_LIST_TYPES = _PRIMITIVE_LIST_TYPES | {TYPE_NAME_FS_LIST}


class TypeSystemMode(Enum):
"""How much type system information to include."""

Expand Down
18 changes: 13 additions & 5 deletions cassis/xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@

from cassis.cas import Cas, IdGenerator, Sofa, View
from cassis.typesystem import (
_LIST_TYPES,
_PRIMITIVE_ARRAY_TYPES,
_PRIMITIVE_LIST_TYPES,
FEATURE_BASE_NAME_BEGIN,
FEATURE_BASE_NAME_END,
FEATURE_BASE_NAME_HEAD,
Expand All @@ -26,6 +28,7 @@
TYPE_NAME_EMPTY_FLOAT_LIST,
TYPE_NAME_EMPTY_FS_LIST,
TYPE_NAME_EMPTY_INTEGER_LIST,
TYPE_NAME_EMPTY_STRING_LIST,
TYPE_NAME_FLOAT,
TYPE_NAME_FLOAT_ARRAY,
TYPE_NAME_FLOAT_LIST,
Expand All @@ -39,16 +42,17 @@
TYPE_NAME_NON_EMPTY_FLOAT_LIST,
TYPE_NAME_NON_EMPTY_FS_LIST,
TYPE_NAME_NON_EMPTY_INTEGER_LIST,
TYPE_NAME_NON_EMPTY_STRING_LIST,
TYPE_NAME_SHORT,
TYPE_NAME_SHORT_ARRAY,
TYPE_NAME_SOFA,
TYPE_NAME_STRING,
TYPE_NAME_STRING_ARRAY,
TYPE_NAME_STRING_LIST,
FeatureStructure,
Type,
TypeNotFoundError,
TypeSystem, TYPE_NAME_STRING_LIST, TYPE_NAME_EMPTY_STRING_LIST, TYPE_NAME_NON_EMPTY_STRING_LIST,
_PRIMITIVE_LIST_TYPES, _LIST_TYPES,
TypeSystem,
)

NAN_VALUE = "NaN"
Expand Down Expand Up @@ -244,7 +248,9 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b
continue

# Resolve references
if fs.type.name == TYPE_NAME_FS_ARRAY or feature.rangeType.name == TYPE_NAME_FS_ARRAY:
if fs.type.name == TYPE_NAME_FS_ARRAY or (
feature.rangeType.name == TYPE_NAME_FS_ARRAY and not feature.multipleReferencesAllowed
):
# An array of references is a list of integers separated
# by single spaces, e.g. <foo:bar elements="1 2 3 42" />
targets = []
Expand Down Expand Up @@ -627,7 +633,9 @@ def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: Featur
elem.attrib[feature_name] = " ".join(str(e.xmiID) for e in value.elements)
elif feature.rangeType.name == TYPE_NAME_FS_LIST and not feature.multipleReferencesAllowed:
if value is not None: # Compare to none to not skip if elements is empty!
elem.attrib[feature_name] = " ".join(str(e.xmiID) for e in self._collect_list_elements(feature.rangeType.name, value))
elem.attrib[feature_name] = " ".join(
str(e.xmiID) for e in self._collect_list_elements(feature.rangeType.name, value)
)
elif feature_name == FEATURE_BASE_NAME_SOFA:
elem.attrib[feature_name] = str(value.xmiID)
elif feature.rangeType.name == TYPE_NAME_BOOLEAN:
Expand Down Expand Up @@ -659,7 +667,7 @@ def _serialize_view(self, root: etree.Element, view: View):
elem.attrib["sofa"] = str(view.sofa.xmiID)
elem.attrib["members"] = " ".join(sorted((str(x.xmiID) for x in view.get_all_annotations()), key=int))

def _collect_list_elements(self, type_name: str, value) -> List[str]:
def _collect_list_elements(self, type_name: str, value) -> List[str]:
if type_name not in _LIST_TYPES:
raise ValueError(f"Not a primitive list: {type_name}")

Expand Down
28 changes: 28 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,20 @@ def cas_with_list_features_xmi(cas_with_list_features_path):
return f.read()


# CAS with all kinds of array features


@pytest.fixture
def cas_with_array_features_path():
return os.path.join(FIXTURE_DIR, "xmi", "cas_with_array_features.xmi")


@pytest.fixture
def cas_with_array_features_xmi(cas_with_array_features_path):
with open(cas_with_array_features_path, "r") as f:
return f.read()


# CAS with references


Expand Down Expand Up @@ -406,6 +420,20 @@ def typesystem_with_list_features_xml(typesystem_with_list_features_path):
return f.read()


# Type system with all kinds of array features


@pytest.fixture
def typesystem_with_array_features_path():
return os.path.join(FIXTURE_DIR, "typesystems", "typesystem_with_array_features.xml")


@pytest.fixture
def typesystem_with_array_features_xml(typesystem_with_array_features_path):
with open(typesystem_with_array_features_path, "r") as f:
return f.read()


# Annotations


Expand Down
117 changes: 117 additions & 0 deletions tests/test_files/typesystems/typesystem_with_array_features.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<?xml version="1.0" encoding="UTF-8"?>
<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
<types>
<typeDescription>
<name>uima.tcas.DocumentAnnotation</name>
<description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>language</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>FloatArrayHolder</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>floatArray</name>
<description/>
<rangeTypeName>uima.cas.FloatArray</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>FloatArrayHolderMR</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>floatArray</name>
<description/>
<rangeTypeName>uima.cas.FloatArray</rangeTypeName>
<multipleReferencesAllowed>true</multipleReferencesAllowed>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>FsArrayHolder</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>fsArray</name>
<description/>
<rangeTypeName>uima.cas.FSArray</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>FsArrayHolderMR</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>fsArray</name>
<description/>
<rangeTypeName>uima.cas.FSArray</rangeTypeName>
<multipleReferencesAllowed>true</multipleReferencesAllowed>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>IntArrayHolder</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>intArray</name>
<description/>
<rangeTypeName>uima.cas.IntegerArray</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>IntArrayHolderMR</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>intArray</name>
<description/>
<rangeTypeName>uima.cas.IntegerArray</rangeTypeName>
<multipleReferencesAllowed>true</multipleReferencesAllowed>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>StringArrayHolder</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>stringArray</name>
<description/>
<rangeTypeName>uima.cas.StringArray</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>StringArrayHolderMR</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>stringArray</name>
<description/>
<rangeTypeName>uima.cas.StringArray</rangeTypeName>
<multipleReferencesAllowed>true</multipleReferencesAllowed>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>
26 changes: 26 additions & 0 deletions tests/test_files/xmi/cas_with_array_features.xmi
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:noNamespace="http:///uima/noNamespace.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0"/>
<cas:FSArray xmi:id="4" elements="2 3"/>
<noNamespace:FsArrayHolder xmi:id="6" fsArray="2 3"/>
<noNamespace:FsArrayHolderMR xmi:id="8" fsArray="7"/>
<noNamespace:IntArrayHolder xmi:id="10" intArray="1 2"/>
<noNamespace:IntArrayHolderMR xmi:id="12" intArray="11"/>
<noNamespace:FloatArrayHolder xmi:id="14" floatArray="-1.0 0.0 1.0 NaN -Infinity Infinity 1.4E-45 3.4028235E38"/>
<noNamespace:FloatArrayHolderMR xmi:id="16" floatArray="15"/>
<noNamespace:StringArrayHolder xmi:id="18">
<stringArray>blah</stringArray>
<stringArray>blub</stringArray>
</noNamespace:StringArrayHolder>
<noNamespace:StringArrayHolderMR xmi:id="20" stringArray="19"/>
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView"/>
<tcas:Annotation xmi:id="2" sofa="1" begin="0" end="1"/>
<tcas:Annotation xmi:id="3" sofa="1" begin="1" end="2"/>
<cas:FSArray xmi:id="7" elements="2 3"/>
<cas:IntegerArray xmi:id="11" elements="1 2"/>
<cas:FloatArray xmi:id="15" elements="-1.0 0.0 1.0 NaN -Infinity Infinity 1.4E-45 3.4028235E38"/>
<cas:StringArray xmi:id="19">
<elements>blah</elements>
<elements>blub</elements>
</cas:StringArray>
<cas:View sofa="1" members="4 6 8 10 12 14 16 18 20"/>
</xmi:XMI>
4 changes: 4 additions & 0 deletions tests/test_xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
pytest.lazy_fixture("cas_with_list_features_xmi"),
pytest.lazy_fixture("typesystem_with_list_features_xml"),
),
(
pytest.lazy_fixture("cas_with_array_features_xmi"),
pytest.lazy_fixture("typesystem_with_array_features_xml"),
),
]


Expand Down

0 comments on commit 2a51991

Please sign in to comment.