Skip to content

Commit

Permalink
#217 - Use types in selects
Browse files Browse the repository at this point in the history
- Accept type name or type in selects
- Updated tests
  • Loading branch information
reckart committed Feb 7, 2023
1 parent 0cf46f2 commit 95a9d4c
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 55 deletions.
32 changes: 17 additions & 15 deletions cassis/cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
TYPE_NAME_FS_LIST,
TYPE_NAME_SOFA,
FeatureStructure,
Type,
TypeCheckError,
TypeSystem,
TypeSystemMode,
Expand Down Expand Up @@ -387,19 +388,20 @@ def get_covered_text(self, annotation: FeatureStructure) -> str:
sofa = self.get_sofa()
return sofa.sofaString[annotation.begin : annotation.end]

def select(self, type_name: str) -> List[FeatureStructure]:
def select(self, type_: Union[Type, str]) -> List[FeatureStructure]:
"""Finds all annotations of type `type_name`.
Args:
type_name: The name of the type whose annotation instances are to be found
type_: The type or name of the type name whose annotation instances are to be found
Returns:
A list of all feature structures of type `type_name`
"""
return self._get_feature_structures(type_name)
t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_)
return self._get_feature_structures(t)

def select_covered(self, type_name: str, covering_annotation: FeatureStructure) -> List[FeatureStructure]:
def select_covered(self, type_: Union[Type, str], covering_annotation: FeatureStructure) -> List[FeatureStructure]:
"""Returns a list of covered annotations.
Return all annotations that are covered
Expand All @@ -408,23 +410,24 @@ def select_covered(self, type_name: str, covering_annotation: FeatureStructure)
are ignored.
Args:
type_name: The type name of the annotations to be returned
type_: The type or name of the type name whose annotation instances are to be found
covering_annotation: The name of the annotation which covers
Returns:
A list of covered annotations
"""
t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_)
c_begin = covering_annotation.begin
c_end = covering_annotation.end

result = []
for annotation in self._get_feature_structures_in_range(type_name, c_begin, c_end):
for annotation in self._get_feature_structures_in_range(t, c_begin, c_end):
if annotation.begin >= c_begin and annotation.end <= c_end:
result.append(annotation)
return result

def select_covering(self, type_name: str, covered_annotation: FeatureStructure) -> List[FeatureStructure]:
def select_covering(self, type_: Union[Type, str], covered_annotation: FeatureStructure) -> List[FeatureStructure]:
"""Returns a list of annotations that cover the given annotation.
Return all annotations that are covering. This can be potentially be slow.
Expand All @@ -433,19 +436,20 @@ def select_covering(self, type_name: str, covered_annotation: FeatureStructure)
are ignored.
Args:
type_name: The type name of the annotations to be returned
type_: The type or name of the type name whose annotation instances are to be found
covered_annotation: The name of the annotation which is covered
Returns:
A list of covering annotations
"""
t = type_ if isinstance(type_, Type) else self.typesystem.get_type(type_)
c_begin = covered_annotation.begin
c_end = covered_annotation.end

# We iterate over all annotations and check whether the provided annotation
# is covered in the current annotation
for annotation in self._get_feature_structures(type_name):
for annotation in self._get_feature_structures(t):
if c_begin >= annotation.begin and c_end <= annotation.end:
yield annotation

Expand All @@ -460,24 +464,22 @@ def select_all(self) -> List[FeatureStructure]:

# FS handling

def _get_feature_structures(self, type_name) -> List[FeatureStructure]:
def _get_feature_structures(self, type_: Type) -> List[FeatureStructure]:
"""Returns a list of all feature structures of type `type_name` and child types."""
t = self._typesystem.get_type(type_name)
types = {c.name for c in t.descendants}
types = {c.name for c in type_.descendants}

result = []
for name in types:
result.extend(self._current_view.type_index[name])

return result

def _get_feature_structures_in_range(self, type_name: str, begin: int, end: int) -> List[FeatureStructure]:
def _get_feature_structures_in_range(self, type_: Type, begin: int, end: int) -> List[FeatureStructure]:
"""Returns a list of all feature structures of type `type_name` and child types.
Only features are returned that are in [begin, end] or close to it. If you use this function,
you should always check bound in the calling method.
"""
t = self._typesystem.get_type(type_name)
types = {c.name for c in t.descendants}
types = {c.name for c in type_.descendants}

result = []
for name in types:
Expand Down
37 changes: 19 additions & 18 deletions tests/test_cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,14 @@ def test_sofa_uri_can_be_set_and_read():


def test_select(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
ts = load_typesystem(small_typesystem_xml)
cas = Cas(typesystem=ts)
cas.add_all(tokens + sentences)

actual_tokens = list(cas.select("cassis.Token"))
actual_sentences = list(cas.select("cassis.Sentence"))

assert actual_tokens == tokens
assert actual_sentences == sentences
assert list(cas.select("cassis.Token")) == tokens
assert list(cas.select("cassis.Sentence")) == sentences
assert list(cas.select(ts.get_type("cassis.Token"))) == tokens
assert list(cas.select(ts.get_type("cassis.Sentence"))) == sentences


def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sentences):
Expand All @@ -138,21 +138,22 @@ def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sent


def test_select_covered(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
ts = load_typesystem(small_typesystem_xml)
cas = Cas(typesystem=ts)
cas.add_all(tokens + sentences)
first_sentence, second_sentence = sentences
tokens_in_first_sentence = tokens[:6]
tokens_in_second_sentence = tokens[6:]

actual_tokens_in_first_sentence = list(cas.select_covered("cassis.Token", first_sentence))
actual_tokens_in_second_sentence = list(cas.select_covered("cassis.Token", second_sentence))

assert actual_tokens_in_first_sentence == tokens_in_first_sentence
assert actual_tokens_in_second_sentence == tokens_in_second_sentence
assert list(cas.select_covered("cassis.Token", first_sentence)) == tokens_in_first_sentence
assert list(cas.select_covered("cassis.Token", second_sentence)) == tokens_in_second_sentence
assert list(cas.select_covered(ts.get_type("cassis.Token"), first_sentence)) == tokens_in_first_sentence
assert list(cas.select_covered(ts.get_type("cassis.Token"), second_sentence)) == tokens_in_second_sentence


def test_select_covered_overlapping(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
ts = load_typesystem(small_typesystem_xml)
cas = Cas(typesystem=ts)

AnnotationType = cas.typesystem.create_type("test.Annotation")
SentenceType = cas.typesystem.get_type("cassis.Sentence")
Expand All @@ -162,9 +163,8 @@ def test_select_covered_overlapping(small_typesystem_xml, tokens, sentences):
cas.add(sentence)
cas.add_all(annotations)

actual_annotations = list(cas.select_covered("test.Annotation", sentence))

assert actual_annotations == annotations
assert list(cas.select_covered("test.Annotation", sentence)) == annotations
assert list(cas.select_covered(ts.get_type("test.Annotation"), sentence)) == annotations


def test_select_covered_also_returns_parent_instances(small_typesystem_xml, tokens, sentences):
Expand Down Expand Up @@ -192,7 +192,8 @@ def test_select_covered_also_returns_parent_instances(small_typesystem_xml, toke


def test_select_covering(small_typesystem_xml, tokens, sentences):
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
ts = load_typesystem(small_typesystem_xml)
cas = Cas(typesystem=ts)
cas.add_all(tokens + sentences)
actual_first_sentence, actual_second_sentence = sentences
tokens_in_first_sentence = tokens[:6]
Expand All @@ -206,7 +207,7 @@ def test_select_covering(small_typesystem_xml, tokens, sentences):
assert actual_first_sentence == first_sentence

for token in tokens_in_second_sentence:
result = list(cas.select_covering("cassis.Sentence", token))
result = list(cas.select_covering(ts.get_type("cassis.Sentence"), token))
second_sentence = result[0]

assert len(result) == 1
Expand Down
17 changes: 9 additions & 8 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,21 +153,22 @@ def test_unicode(json_path, annotations):
actual_utf8_bytes = bytes(actual_covered_text, "UTF-8")
assert actual_utf8_bytes == expected_utf8_bytes


def test_recursive_type_system():
typesystem = TypeSystem()
type_a = typesystem.create_type(name='example.TypeA')
type_b = typesystem.create_type(name='example.TypeB')
typesystem.create_feature(domainType=type_a, name='typeB', rangeType=type_b)
typesystem.create_feature(domainType=type_b, name='typeA', rangeType=type_a)
type_a = typesystem.create_type(name="example.TypeA")
type_b = typesystem.create_type(name="example.TypeB")
typesystem.create_feature(domainType=type_a, name="typeB", rangeType=type_b)
typesystem.create_feature(domainType=type_b, name="typeA", rangeType=type_a)

source_cas = Cas(typesystem=typesystem)
target_cas = load_cas_from_json(source_cas.to_json(type_system_mode=TypeSystemMode.FULL))

target_type_a = target_cas.typesystem.get_type('example.TypeA')
target_type_b = target_cas.typesystem.get_type('example.TypeB')
target_type_a = target_cas.typesystem.get_type("example.TypeA")
target_type_b = target_cas.typesystem.get_type("example.TypeB")

# We have to compare types by name below due to https://github.com/dkpro/dkpro-cassis/issues/270
assert target_type_a is not None
assert target_type_a.get_feature('typeB').rangeType.name == target_type_b.name
assert target_type_a.get_feature("typeB").rangeType.name == target_type_b.name
assert target_type_b is not None
assert target_type_b.get_feature('typeA').rangeType.name == target_type_a.name
assert target_type_b.get_feature("typeA").rangeType.name == target_type_a.name
30 changes: 16 additions & 14 deletions tests/test_typesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,37 +733,39 @@ def test_that_merging_types_with_different_incompatible_supertypes_throws():
with pytest.raises(ValueError, match=r".*incompatible super types.*"):
merge_typesystems(ts1, ts2)


def test_that_merging_types_creates_self_contained_type_system():
ts1 = TypeSystem()
type_a = ts1.create_type(name='example.TypeA')
type_b = ts1.create_type(name='example.TypeB')
type_c = ts1.create_type(name='example.TypeC', supertypeName='example.TypeA')
ts1.create_feature(domainType=type_a, name='typeB', rangeType=type_b)
ts1.create_feature(domainType=type_b, name='typeA', rangeType=type_a)
type_a = ts1.create_type(name="example.TypeA")
type_b = ts1.create_type(name="example.TypeB")
type_c = ts1.create_type(name="example.TypeC", supertypeName="example.TypeA")
ts1.create_feature(domainType=type_a, name="typeB", rangeType=type_b)
ts1.create_feature(domainType=type_b, name="typeA", rangeType=type_a)

ts2 = TypeSystem()

ts_merged = merge_typesystems(ts1, ts2)

merged_type_a = ts_merged.get_type('example.TypeA')
merged_type_b = ts_merged.get_type('example.TypeB')
merged_type_c = ts_merged.get_type('example.TypeC')
merged_type_a = ts_merged.get_type("example.TypeA")
merged_type_b = ts_merged.get_type("example.TypeB")
merged_type_c = ts_merged.get_type("example.TypeC")

assert merged_type_a is not None
assert merged_type_a.get_feature('typeB') is not type_a.get_feature('typeB')
assert merged_type_a.get_feature('typeB').rangeType is merged_type_b
assert merged_type_a.get_feature('typeB').rangeType is not type_b
assert merged_type_a.get_feature("typeB") is not type_a.get_feature("typeB")
assert merged_type_a.get_feature("typeB").rangeType is merged_type_b
assert merged_type_a.get_feature("typeB").rangeType is not type_b
assert merged_type_b is not None
assert merged_type_b.get_feature('typeA') is not type_b.get_feature('typeA')
assert merged_type_b.get_feature('typeA').rangeType is merged_type_a
assert merged_type_b.get_feature('typeA').rangeType is not type_a
assert merged_type_b.get_feature("typeA") is not type_b.get_feature("typeA")
assert merged_type_b.get_feature("typeA").rangeType is merged_type_a
assert merged_type_b.get_feature("typeA").rangeType is not type_a
assert merged_type_c is not None
assert merged_type_c.supertype is merged_type_a
assert merged_type_c.supertype is not type_c


# DKPro Core Support


def test_that_dkpro_core_typeystem_can_be_loaded():
ts = load_dkpro_core_typesystem()

Expand Down

0 comments on commit 95a9d4c

Please sign in to comment.