From f5d396c098319a73adce3f0a117bd046f0be0169 Mon Sep 17 00:00:00 2001 From: Jan-Christoph Klie Date: Sat, 14 Aug 2021 18:03:40 +0200 Subject: [PATCH] #170 - Handling of the "uima.noNamespace" prefix (#178) - Move no namespace handling to xmi - Add better tests for this --- cassis/typesystem.py | 17 ++-------- cassis/xmi.py | 13 ++++++-- tests/fixtures.py | 14 ++++++++ ...typesystem_has_types_with_no_namespace.xml | 33 +++---------------- .../xmi/cas_has_fs_with_no_namespace.xmi | 7 ++++ tests/test_xmi.py | 4 +++ 6 files changed, 42 insertions(+), 46 deletions(-) create mode 100644 tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi diff --git a/cassis/typesystem.py b/cassis/typesystem.py index 7ba954e..7623aea 100644 --- a/cassis/typesystem.py +++ b/cassis/typesystem.py @@ -782,12 +782,6 @@ def deserialize(self, source: Union[IO, str]) -> TypeSystem: description = self._get_elem_as_str(elem.find("{*}description")) supertypeName = self._get_elem_as_str(elem.find("{*}supertypeName")) - if "." not in type_name: - type_name = "uima.noNamespace." + type_name - - if "." not in supertypeName: - supertypeName = "uima.noNamespace." + supertypeName - types[type_name] = Type(name=type_name, supertypeName=supertypeName, description=description) type_dependencies[type_name].add(supertypeName) @@ -923,20 +917,13 @@ def _serialize_type(self, xf: IO, type_: Type): typeDescription = etree.Element("typeDescription") name = etree.SubElement(typeDescription, "name") - type_name = type_.name - if type_name.startswith("uima.noNamespace."): - type_name = type_name.replace("uima.noNamespace.", "") - - name.text = type_name + name.text = type_.name description = etree.SubElement(typeDescription, "description") description.text = type_.description supertype_name_node = etree.SubElement(typeDescription, "supertypeName") - supertype_name = type_.supertypeName - if supertype_name.startswith("uima.noNamespace."): - supertype_name = supertype_name.replace("uima.noNamespace.", "") - supertype_name_node.text = supertype_name + supertype_name_node.text = type_.supertypeName # Only create the `feature` element if there is at least one feature feature_list = list(type_.features) diff --git a/cassis/xmi.py b/cassis/xmi.py index 8656c9d..6571473 100644 --- a/cassis/xmi.py +++ b/cassis/xmi.py @@ -262,9 +262,12 @@ def _parse_view(self, elem) -> ProtoView: def _parse_feature_structure(self, typesystem: TypeSystem, elem, children: Dict[str, List[str]]): # Strip the http prefix, replace / with ., remove the ecore part # TODO: Error checking - typename = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip() + type_name: str = elem.tag[9:].replace("/", ".").replace("ecore}", "").strip() - AnnotationType = typesystem.get_type(typename) + if type_name.startswith("uima.noNamespace."): + type_name = type_name[17:] + + AnnotationType = typesystem.get_type(type_name) attributes = dict(elem.attrib) attributes.update(children) @@ -363,8 +366,12 @@ def _serialize_cas_null(self, root: etree.Element): def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: FeatureStructure): ts = cas.typesystem + type_name = fs.type + if "." not in type_name: + type_name = f"uima.noNamespace.{type_name}" + # The type name is a Java package, e.g. `org.myproj.Foo`. - parts = fs.type.split(".") + parts = type_name.split(".") # The CAS type namespace is converted to an XML namespace URI by the following rule: # replace all dots with slashes, prepend http:///, and append .ecore. diff --git a/tests/fixtures.py b/tests/fixtures.py index e13ed24..5bd296e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -160,6 +160,20 @@ def cas_with_leniency_xmi(cas_with_leniency_path): return f.read() +# CAS and with feature structures whose types have no namespace + + +@pytest.fixture +def cas_has_fs_with_no_namespace_path(): + return os.path.join(FIXTURE_DIR, "xmi", "cas_has_fs_with_no_namespace.xmi") + + +@pytest.fixture +def cas_has_fs_with_no_namespace_xmi(cas_has_fs_with_no_namespace_path): + with open(cas_has_fs_with_no_namespace_path, "r") as f: + return f.read() + + # Small type system diff --git a/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml b/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml index 6805a8d..245aa76 100644 --- a/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml +++ b/tests/test_files/typesystems/typesystem_has_types_with_no_namespace.xml @@ -2,41 +2,18 @@ - ArtifactID + TypeWithNoNameSpace A unique artifact identifier. - uima.cas.TOP + uima.tcas.Annotation - artifactID - A unique identification string for the artifact. This should be the file name for files, - or the unique identifier used in a database if the document source is a database - collection reader. + testFeature + I am the testFeatureDescription uima.cas.String - - ArtifactMetadata - A piece of metadata about the artifact in the form of a key value pair. - uima.cas.TOP - - - key - - uima.cas.String - - - value - - uima.cas.String - - - - - ChildOfArtifactID - ArtifactID - - + diff --git a/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi b/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi new file mode 100644 index 0000000..ed9748b --- /dev/null +++ b/tests/test_files/xmi/cas_has_fs_with_no_namespace.xmi @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/tests/test_xmi.py b/tests/test_xmi.py index 1602020..064d6d3 100644 --- a/tests/test_xmi.py +++ b/tests/test_xmi.py @@ -19,6 +19,10 @@ (pytest.lazy_fixture("cas_with_reserved_names_xmi"), pytest.lazy_fixture("typesystem_with_reserved_names_xml")), (pytest.lazy_fixture("cas_with_two_sofas_xmi"), pytest.lazy_fixture("small_typesystem_xml")), (pytest.lazy_fixture("cas_with_smileys_xmi"), pytest.lazy_fixture("dkpro_typesystem_xml")), + ( + pytest.lazy_fixture("cas_has_fs_with_no_namespace_xmi"), + pytest.lazy_fixture("typesystem_has_types_with_no_namespace_xml"), + ), ]