Skip to content

Commit

Permalink
Merge branch 'main' into feature/168-Experimental-JSON-CAS-support
Browse files Browse the repository at this point in the history
* main:
  #238 - Error parsing FSList in CTAKES XMi
  #238 - Error parsing FSList in CTAKES XMi
  #238 - Error parsing FSList in CTAKES XMi
  Create CITATION.cff
  #236 - Long output when printing type (#237)

% Conflicts:
%	cassis/cas.py
  • Loading branch information
reckart committed Dec 12, 2021
2 parents 9a0d17c + 0b802b3 commit 0566e37
Show file tree
Hide file tree
Showing 9 changed files with 411 additions and 20 deletions.
14 changes: 14 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: "Klie"
given-names: "Jan-Christoph "
orcid: "https://orcid.org/0000-0003-0181-6450"
- family-names: "Eckart de Castilho"
given-names: "Richard"
orcid: "https://orcid.org/0000-0003-0991-7045"
title: "dkpro-cassis"
version: 0.6.1
doi: 10.5281/zenodo.5537447
date-released: 2021-11-23
url: "https://github.com/dkpro/dkpro-cassis"
23 changes: 16 additions & 7 deletions cassis/cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from attr import validators
from sortedcontainers import SortedKeyList

from cassis.typesystem import TYPE_NAME_SOFA, FeatureStructure, TypeCheckError, TypeSystem, TypeSystemMode
from cassis.typesystem import TYPE_NAME_SOFA, FeatureStructure, TypeCheckError, TypeSystem, TYPE_NAME_FS_LIST, \
TYPE_NAME_FS_ARRAY, FEATURE_BASE_NAME_HEAD, TypeSystemMode

_validator_optional_string = validators.optional(validators.instance_of(str))

Expand Down Expand Up @@ -651,7 +652,7 @@ def typecheck(self) -> List[TypeCheckError]:
def _find_all_fs(
self,
generate_missing_ids: bool = True,
include_inlinable_arrays: bool = False,
include_inlinable_arrays_and_lists: bool = False,
seeds: Iterable = None,
) -> Iterable[FeatureStructure]:
"""This function traverses the whole CAS in order to find all directly and indirectly referenced
Expand Down Expand Up @@ -717,21 +718,29 @@ def _find_all_fs(
continue

if (
not include_inlinable_arrays
not include_inlinable_arrays_and_lists
and not feature.multipleReferencesAllowed
and ts.is_array(feature.rangeType)
and (ts.is_array(feature.rangeType) or ts.is_list(feature.rangeType))
):
# For inlined FSArrays, we still need to scan their members
if feature.rangeType.name == "uima.cas.FSArray" and feature_value.elements:
# For inlined FSArrays / FSList, we still need to scan their members
if feature.rangeType.name == TYPE_NAME_FS_ARRAY and feature_value.elements:
for ref in feature_value.elements:
if not ref or ref.xmiID in all_fs:
continue
openlist.append(ref)
elif feature.rangeType.name == TYPE_NAME_FS_LIST and hasattr(feature_value, FEATURE_BASE_NAME_HEAD):
v = feature_value
while hasattr(v, FEATURE_BASE_NAME_HEAD):
if not v.head or v.head.xmiID in all_fs:
continue
openlist.append(v.head)
v = v.tail
# For primitive arrays / lists, we do not need to handle the elements
continue

if not hasattr(feature_value, "xmiID"):
raise AttributeError(
f"Feature [{feature_name}] should point to a [{feature.rangeType.name}] but the feature value is a [{type(feature_value)}] with the value [{feature_value}]"
f"Feature [{feature.domainType.name}:{feature_name}] should point to a [{feature.rangeType.name}] but the feature value is a [{type(feature_value)}] with the value [{feature_value}]"
)

if feature_value.xmiID in all_fs:
Expand Down
2 changes: 1 addition & 1 deletion cassis/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def serialize(

# Find all fs, even the ones that are not directly added to a sofa
used_types = set()
for fs in sorted(cas._find_all_fs(include_inlinable_arrays=True), key=lambda a: a.xmiID):
for fs in sorted(cas._find_all_fs(include_inlinable_arrays_and_lists=True), key=lambda a: a.xmiID):
used_types.add(fs.type)
json_fs = self._serialize_feature_structure(fs)
feature_structures.append(json_fs)
Expand Down
76 changes: 75 additions & 1 deletion cassis/typesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,21 @@
TYPE_NAME_DOUBLE = UIMA_CAS_PREFIX + "Double"
TYPE_NAME_ARRAY_BASE = UIMA_CAS_PREFIX + "ArrayBase"
TYPE_NAME_FS_ARRAY = UIMA_CAS_PREFIX + "FSArray"
TYPE_NAME_FS_LIST = UIMA_CAS_PREFIX + "FSList"
TYPE_NAME_EMPTY_FS_LIST = UIMA_CAS_PREFIX + "EmptyFSList"
TYPE_NAME_NON_EMPTY_FS_LIST = UIMA_CAS_PREFIX + "NonEmptyFSList"
TYPE_NAME_INTEGER_ARRAY = UIMA_CAS_PREFIX + "IntegerArray"
TYPE_NAME_INTEGER_LIST = UIMA_CAS_PREFIX + "IntegerList"
TYPE_NAME_EMPTY_INTEGER_LIST = UIMA_CAS_PREFIX + "EmptyIntegerList"
TYPE_NAME_NON_EMPTY_INTEGER_LIST = UIMA_CAS_PREFIX + "NonEmptyIntegerList"
TYPE_NAME_FLOAT_ARRAY = UIMA_CAS_PREFIX + "FloatArray"
TYPE_NAME_FLOAT_LIST = UIMA_CAS_PREFIX + "FloatList"
TYPE_NAME_EMPTY_FLOAT_LIST = UIMA_CAS_PREFIX + "EmptyFloatList"
TYPE_NAME_NON_EMPTY_FLOAT_LIST = UIMA_CAS_PREFIX + "NonEmptyFloatList"
TYPE_NAME_STRING_ARRAY = UIMA_CAS_PREFIX + "StringArray"
TYPE_NAME_STRING_LIST = UIMA_CAS_PREFIX + "StringList"
TYPE_NAME_EMPTY_STRING_LIST = UIMA_CAS_PREFIX + "EmptyStringList"
TYPE_NAME_NON_EMPTY_STRING_LIST = UIMA_CAS_PREFIX + "NonEmptyStringList"
TYPE_NAME_BOOLEAN_ARRAY = UIMA_CAS_PREFIX + "BooleanArray"
TYPE_NAME_BYTE_ARRAY = UIMA_CAS_PREFIX + "ByteArray"
TYPE_NAME_SHORT_ARRAY = UIMA_CAS_PREFIX + "ShortArray"
Expand All @@ -57,6 +69,8 @@
FEATURE_BASE_NAME_BEGIN = "begin"
FEATURE_BASE_NAME_END = "end"
FEATURE_BASE_NAME_LANGUAGE = "language"
FEATURE_BASE_NAME_HEAD = "head"
FEATURE_BASE_NAME_TAIL = "tail"

_DOCUMENT_ANNOTATION_TYPE = "uima.tcas.DocumentAnnotation"

Expand Down Expand Up @@ -167,10 +181,13 @@
"uima.cas.StringArray",
}

_PRIMITIVE_LIST_TYPES = {TYPE_NAME_INTEGER_LIST, TYPE_NAME_FLOAT_LIST, TYPE_NAME_STRING_LIST}

_INHERITANCE_FINAL_TYPES = _PRIMITIVE_ARRAY_TYPES

_ARRAY_TYPES = _PRIMITIVE_ARRAY_TYPES | {"uima.cas.FSArray"}
_ARRAY_TYPES = _PRIMITIVE_ARRAY_TYPES | {TYPE_NAME_FS_ARRAY}

_LIST_TYPES = _PRIMITIVE_LIST_TYPES | {TYPE_NAME_FS_LIST}

class TypeSystemMode(Enum):
"""How much type system information to include."""
Expand Down Expand Up @@ -254,6 +271,23 @@ def is_primitive_array(type_: Union[str, "Type"]) -> bool:
return type_name in _PRIMITIVE_ARRAY_TYPES


def is_primitive_list(type_: Union[str, "Type"]) -> bool:
"""Checks if the type identified by `type` is a primitive list, e.g. list of primitives.
Args:
type_: Type to query for (`Type` or name as string)
Returns:
Returns `True` if the type identified by `type` is a primitive array type, else `False`
"""
type_name = type_ if isinstance(type_, str) else type_.name

if type_name == TOP_TYPE_NAME:
return False

# Arrays are inheritance-final, so we do not need to check the inheritance hierarchy
return type_name in _PRIMITIVE_LIST_TYPES


def is_array(type_: Union[str, "Type"]) -> bool:
"""Checks if the type identified by `type` is an array.
Expand All @@ -271,6 +305,23 @@ def is_array(type_: Union[str, "Type"]) -> bool:
return type_name in _ARRAY_TYPES


def is_list(type_: Union[str, "Type"]) -> bool:
"""Checks if the type identified by `type` is a list.
Args:
type_: Type to query for (`Type` or name as string)
Returns:
Returns `True` if the type identified by `type` is a list type, else `False`
"""
type_name = type_ if isinstance(type_, str) else type_.name

if type_name == TOP_TYPE_NAME:
return False

# Lists are inheritance-final, so we do not need to check the inheritance hierarchy
return type_name in _LIST_TYPES


@attr.s
class TypeCheckError(Exception):
xmiID: int = attr.ib() # xmiID of the feature structure with type error
Expand Down Expand Up @@ -598,6 +649,9 @@ def __hash__(self):
def __eq__(self, other):
return self.name == other.name

def __str__(self):
return f"Type(name={self.name})"


class TypeSystem:
def __init__(self, add_document_annotation_type: bool = True):
Expand Down Expand Up @@ -817,6 +871,16 @@ def is_primitive_array(self, type_: Union[str, Type]) -> bool:
"""
return is_primitive_array(type_)

def is_primitive_list(self, type_: Union[str, Type]) -> bool:
"""Checks if the type identified by `type` is a primitive list, e.g. list of primitives.
Args:
type_: Type to query for (`Type` or name as string)
Returns:
Returns `True` if the type identified by `type` is a primitive array type, else `False`
"""
return is_primitive_list(type_)

def is_array(self, type_: Union[str, Type]) -> bool:
"""Checks if the type identified by `type` is an array.
Expand All @@ -827,6 +891,16 @@ def is_array(self, type_: Union[str, Type]) -> bool:
"""
return is_array(type_)

def is_list(self, type_: Union[str, Type]) -> bool:
"""Checks if the type identified by `type` is a list.
Args:
type_: Type to query for (`Type` or name as string)
Returns:
Returns `True` if the type identified by `type` is a list type, else `False`
"""
return is_list(type_)

def subsumes(self, parent: Union[str, Type], child: Union[str, Type]) -> bool:
"""Determines if the type `child` is a child of `parent`.
Expand Down
Loading

0 comments on commit 0566e37

Please sign in to comment.