diff --git a/cassis/cas.py b/cassis/cas.py index d786543..c08da1f 100644 --- a/cassis/cas.py +++ b/cassis/cas.py @@ -8,8 +8,16 @@ from attr import validators from sortedcontainers import SortedKeyList -from cassis.typesystem import TYPE_NAME_SOFA, FeatureStructure, TypeCheckError, TypeSystem, TYPE_NAME_FS_LIST, \ - TYPE_NAME_FS_ARRAY, FEATURE_BASE_NAME_HEAD, TypeSystemMode +from cassis.typesystem import ( + FEATURE_BASE_NAME_HEAD, + TYPE_NAME_FS_ARRAY, + TYPE_NAME_FS_LIST, + TYPE_NAME_SOFA, + FeatureStructure, + TypeCheckError, + TypeSystem, + TypeSystemMode, +) _validator_optional_string = validators.optional(validators.instance_of(str)) diff --git a/cassis/typesystem.py b/cassis/typesystem.py index 12d8122..436fc13 100644 --- a/cassis/typesystem.py +++ b/cassis/typesystem.py @@ -189,6 +189,7 @@ _LIST_TYPES = _PRIMITIVE_LIST_TYPES | {TYPE_NAME_FS_LIST} + class TypeSystemMode(Enum): """How much type system information to include.""" diff --git a/cassis/xmi.py b/cassis/xmi.py index fc6f2d7..cdc95d3 100644 --- a/cassis/xmi.py +++ b/cassis/xmi.py @@ -10,7 +10,9 @@ from cassis.cas import Cas, IdGenerator, Sofa, View from cassis.typesystem import ( + _LIST_TYPES, _PRIMITIVE_ARRAY_TYPES, + _PRIMITIVE_LIST_TYPES, FEATURE_BASE_NAME_BEGIN, FEATURE_BASE_NAME_END, FEATURE_BASE_NAME_HEAD, @@ -26,6 +28,7 @@ TYPE_NAME_EMPTY_FLOAT_LIST, TYPE_NAME_EMPTY_FS_LIST, TYPE_NAME_EMPTY_INTEGER_LIST, + TYPE_NAME_EMPTY_STRING_LIST, TYPE_NAME_FLOAT, TYPE_NAME_FLOAT_ARRAY, TYPE_NAME_FLOAT_LIST, @@ -39,16 +42,17 @@ TYPE_NAME_NON_EMPTY_FLOAT_LIST, TYPE_NAME_NON_EMPTY_FS_LIST, TYPE_NAME_NON_EMPTY_INTEGER_LIST, + TYPE_NAME_NON_EMPTY_STRING_LIST, TYPE_NAME_SHORT, TYPE_NAME_SHORT_ARRAY, TYPE_NAME_SOFA, TYPE_NAME_STRING, TYPE_NAME_STRING_ARRAY, + TYPE_NAME_STRING_LIST, FeatureStructure, Type, TypeNotFoundError, - TypeSystem, TYPE_NAME_STRING_LIST, TYPE_NAME_EMPTY_STRING_LIST, TYPE_NAME_NON_EMPTY_STRING_LIST, - _PRIMITIVE_LIST_TYPES, _LIST_TYPES, + TypeSystem, ) NAN_VALUE = "NaN" @@ -244,7 +248,9 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b continue # Resolve references - if fs.type.name == TYPE_NAME_FS_ARRAY or feature.rangeType.name == TYPE_NAME_FS_ARRAY: + if fs.type.name == TYPE_NAME_FS_ARRAY or ( + feature.rangeType.name == TYPE_NAME_FS_ARRAY and not feature.multipleReferencesAllowed + ): # An array of references is a list of integers separated # by single spaces, e.g. targets = [] @@ -627,7 +633,9 @@ def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: Featur elem.attrib[feature_name] = " ".join(str(e.xmiID) for e in value.elements) elif feature.rangeType.name == TYPE_NAME_FS_LIST and not feature.multipleReferencesAllowed: if value is not None: # Compare to none to not skip if elements is empty! - elem.attrib[feature_name] = " ".join(str(e.xmiID) for e in self._collect_list_elements(feature.rangeType.name, value)) + elem.attrib[feature_name] = " ".join( + str(e.xmiID) for e in self._collect_list_elements(feature.rangeType.name, value) + ) elif feature_name == FEATURE_BASE_NAME_SOFA: elem.attrib[feature_name] = str(value.xmiID) elif feature.rangeType.name == TYPE_NAME_BOOLEAN: @@ -659,7 +667,7 @@ def _serialize_view(self, root: etree.Element, view: View): elem.attrib["sofa"] = str(view.sofa.xmiID) elem.attrib["members"] = " ".join(sorted((str(x.xmiID) for x in view.get_all_annotations()), key=int)) - def _collect_list_elements(self, type_name: str, value) -> List[str]: + def _collect_list_elements(self, type_name: str, value) -> List[str]: if type_name not in _LIST_TYPES: raise ValueError(f"Not a primitive list: {type_name}") diff --git a/tests/fixtures.py b/tests/fixtures.py index 8e9294c..45cf2aa 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -76,6 +76,20 @@ def cas_with_list_features_xmi(cas_with_list_features_path): return f.read() +# CAS with all kinds of array features + + +@pytest.fixture +def cas_with_array_features_path(): + return os.path.join(FIXTURE_DIR, "xmi", "cas_with_array_features.xmi") + + +@pytest.fixture +def cas_with_array_features_xmi(cas_with_array_features_path): + with open(cas_with_array_features_path, "r") as f: + return f.read() + + # CAS with references @@ -406,6 +420,20 @@ def typesystem_with_list_features_xml(typesystem_with_list_features_path): return f.read() +# Type system with all kinds of array features + + +@pytest.fixture +def typesystem_with_array_features_path(): + return os.path.join(FIXTURE_DIR, "typesystems", "typesystem_with_array_features.xml") + + +@pytest.fixture +def typesystem_with_array_features_xml(typesystem_with_array_features_path): + with open(typesystem_with_array_features_path, "r") as f: + return f.read() + + # Annotations diff --git a/tests/test_files/typesystems/typesystem_with_array_features.xml b/tests/test_files/typesystems/typesystem_with_array_features.xml new file mode 100644 index 0000000..9b89fba --- /dev/null +++ b/tests/test_files/typesystems/typesystem_with_array_features.xml @@ -0,0 +1,117 @@ + + + + + uima.tcas.DocumentAnnotation + + uima.tcas.Annotation + + + language + + uima.cas.String + + + + + FloatArrayHolder + + uima.cas.TOP + + + floatArray + + uima.cas.FloatArray + + + + + FloatArrayHolderMR + + uima.cas.TOP + + + floatArray + + uima.cas.FloatArray + true + + + + + FsArrayHolder + + uima.cas.TOP + + + fsArray + + uima.cas.FSArray + + + + + FsArrayHolderMR + + uima.cas.TOP + + + fsArray + + uima.cas.FSArray + true + + + + + IntArrayHolder + + uima.cas.TOP + + + intArray + + uima.cas.IntegerArray + + + + + IntArrayHolderMR + + uima.cas.TOP + + + intArray + + uima.cas.IntegerArray + true + + + + + StringArrayHolder + + uima.cas.TOP + + + stringArray + + uima.cas.StringArray + + + + + StringArrayHolderMR + + uima.cas.TOP + + + stringArray + + uima.cas.StringArray + true + + + + + diff --git a/tests/test_files/xmi/cas_with_array_features.xmi b/tests/test_files/xmi/cas_with_array_features.xmi new file mode 100644 index 0000000..3dc1fc1 --- /dev/null +++ b/tests/test_files/xmi/cas_with_array_features.xmi @@ -0,0 +1,26 @@ + + + + + + + + + + + blah + blub + + + + + + + + + + blah + blub + + + diff --git a/tests/test_xmi.py b/tests/test_xmi.py index ac290d5..81f9fc1 100644 --- a/tests/test_xmi.py +++ b/tests/test_xmi.py @@ -39,6 +39,10 @@ pytest.lazy_fixture("cas_with_list_features_xmi"), pytest.lazy_fixture("typesystem_with_list_features_xml"), ), + ( + pytest.lazy_fixture("cas_with_array_features_xmi"), + pytest.lazy_fixture("typesystem_with_array_features_xml"), + ), ]