Skip to content

Commit

Permalink
#250 - Convenience for setting the document language
Browse files Browse the repository at this point in the history
- Added get_document_annotation()
- Added document_language property
- Added test
  • Loading branch information
reckart committed Feb 4, 2024
1 parent ef1d5f4 commit 8260333
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
35 changes: 35 additions & 0 deletions cassis/cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def __init__(
lenient: bool = False,
sofa_string: str = None,
sofa_mime: str = None,
document_language: str = None,
):
"""Creates a CAS with the specified typesystem. If no typesystem is given, then the default one
is used which only contains UIMA-predefined types.
Expand Down Expand Up @@ -241,6 +242,9 @@ def __init__(
else:
self.sofa_mime = "text/plain"

if document_language is not None:
self.document_language = document_language

@property
def typesystem(self) -> TypeSystem:
return self._typesystem
Expand Down Expand Up @@ -512,6 +516,19 @@ def get_sofa(self) -> Sofa:
"""
return self._current_view.sofa

def get_document_annotation(self) -> FeatureStructure:
"""Get the DocumentAnnotation feature structure associated with this CAS view. If none exists, one is created.
Returns:
The DocumentAnnotation associated with this CAS view.
"""
try:
return self.select(TYPE_NAME_DOCUMENT_ANNOTATION)[0]
except IndexError:
document_annotation = self.typesystem.get_type(TYPE_NAME_DOCUMENT_ANNOTATION)()
self.add(document_annotation)
return document_annotation

@property
def sofas(self) -> List[Sofa]:
"""Finds all sofas that this CAS manages
Expand Down Expand Up @@ -598,6 +615,24 @@ def sofa_array(self, value):
"""
self.get_sofa().sofaArray = value

@property
def document_language(self) -> str:
"""The document language contains the language code for the document.
Returns: The document language.
"""
return self.get_document_annotation().get(FEATURE_BASE_NAME_LANGUAGE)

@document_language.setter
def document_language(self, value) -> str:
"""Sets document language.
Args:
value: The document language
"""
self.get_document_annotation().set(FEATURE_BASE_NAME_LANGUAGE, value)

def to_xmi(self, path: Union[str, Path, None] = None, pretty_print: bool = False) -> Optional[str]:
"""Creates a XMI representation of this CAS.
Expand Down
8 changes: 8 additions & 0 deletions tests/test_cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ def test_sofa_string_and_mime_type_can_be_set_using_constructor():
assert cas.sofa_mime == "text/html"


def test_document_language_can_be_set_using_constructor():
cas = Cas(sofa_string="Ich bin ein test!", document_language="de")

assert cas.sofa_string == "Ich bin ein test!"
assert cas.sofa_mime == "text/plain"
assert cas.document_language == "de"


# Select


Expand Down

0 comments on commit 8260333

Please sign in to comment.