Skip to content

Commit

Permalink
Merge branch 'main' into feature/168-Experimental-JSON-CAS-support
Browse files Browse the repository at this point in the history
* main:
  #174 - FSes that are only transitively referenced cannot be serialized (#179)
  #174 - FSes that are only transitively referenced cannot be serialized (#179)
  • Loading branch information
reckart committed Aug 18, 2021
2 parents 0a6be49 + 50fcd66 commit 95bf09a
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 7 deletions.
25 changes: 20 additions & 5 deletions cassis/cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ def typecheck(self) -> List[TypeCheckError]:

return all_errors

def _find_all_fs(self, generate_missing_ids: bool = False) -> Iterable[FeatureStructure]:
def _find_all_fs(self, generate_missing_ids: bool = True) -> Iterable[FeatureStructure]:
"""This function traverses the whole CAS in order to find all directly and indirectly referenced
feature structures. Traversing is needed as it can be that a feature structure is not added to the sofa but
referenced by another feature structure as a feature."""
Expand All @@ -645,8 +645,25 @@ def _find_all_fs(self, generate_missing_ids: bool = False) -> Iterable[FeatureSt
ts = self.typesystem
while openlist:
fs = openlist.pop(0)
if generate_missing_ids and fs.xmiID is None:
fs.xmiID = self._get_next_xmi_id()

# We do not want to return cas:NULL here as we handle serializing it later
if fs.xmiID == 0:
continue

if fs.xmiID is None:
if generate_missing_ids:
fs.xmiID = self._get_next_xmi_id()
else:
raise ValueError("FS has no ID and ID generation is disabled! {fs}".format(fs=fs))

existing_fs = all_fs.get(fs.xmiID)
if existing_fs is not None and existing_fs is not fs:
raise ValueError(
"Duplicate FS id [{fsId}] used for [{fs1}] and [{fs2}]".format(
fsId=fs.xmiID, fs1=existing_fs, fs2=fs
)
)

all_fs[fs.xmiID] = fs

t = ts.get_type(fs.type)
Expand Down Expand Up @@ -678,8 +695,6 @@ def _find_all_fs(self, generate_missing_ids: bool = False) -> Iterable[FeatureSt
if referenced_fs.xmiID not in all_fs:
openlist.append(referenced_fs)

# We do not want to return cas:NULL here as we handle serializing it later
all_fs.pop(0, None)
yield from all_fs.values()

def _get_next_xmi_id(self) -> int:
Expand Down
2 changes: 1 addition & 1 deletion cassis/xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
self._serialize_cas_null(root)

# Find all fs, even the ones that are not directly added to a sofa
for fs in sorted(cas._find_all_fs(generate_missing_ids=True), key=lambda a: a.xmiID):
for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID):
self._serialize_feature_structure(cas, root, fs)

for sofa in cas.sofas:
Expand Down
11 changes: 11 additions & 0 deletions tests/test_cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,14 @@ def test_removing_throws_if_fs_in_other_view(small_typesystem_xml, tokens, sente

with pytest.raises(ValueError):
view.remove(tokens[0])


def test_fail_on_duplicate_fs_id(small_typesystem_xml):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))

TokenType = cas.typesystem.get_type("cassis.Token")
cas.add_annotation(TokenType(xmiID=10, begin=0, end=0))
cas.add_annotation(TokenType(xmiID=10, begin=10, end=10))

with pytest.raises(ValueError):
list(cas._find_all_fs())
2 changes: 1 addition & 1 deletion tests/test_xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def test_serializing_xmi_ignores_none_features(small_xmi, small_typesystem_xml):
typesystem = load_typesystem(small_typesystem_xml)
cas = load_cas_from_xmi(small_xmi, typesystem=typesystem)
TokenType = typesystem.get_type("cassis.Token")
cas.add(TokenType(xmiID=13, sofa=1, begin=0, end=3, id=None, pos=None))
cas.add(TokenType(begin=0, end=3, id=None, pos=None))

actual_xml = cas.to_xmi()

Expand Down

0 comments on commit 95bf09a

Please sign in to comment.