From d85f8ff911a2696639d92a210e984c71b2359cb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Baumg=C3=A4rtner?= Date: Wed, 3 May 2023 13:41:41 +0200 Subject: [PATCH 1/2] add lenient parapemter to load from json --- cassis/json.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cassis/json.py b/cassis/json.py index d2dc270..3fa8885 100644 --- a/cassis/json.py +++ b/cassis/json.py @@ -37,7 +37,7 @@ NEGATIVE_INFINITE_VALUE_ABBR = "-Inf" -def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None) -> Cas: +def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None, lenient: bool = False) -> Cas: """Loads a CAS from a JSON source. Args: @@ -55,7 +55,7 @@ def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None) -> typesystem = TypeSystem() deserializer = CasJsonDeserializer() - return deserializer.deserialize(source, typesystem=typesystem) + return deserializer.deserialize(source, typesystem=typesystem, lenient=lenient) class CasJsonDeserializer: @@ -64,7 +64,7 @@ def __init__(self): self._max_sofa_num = 0 self._post_processors = [] - def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = None) -> Cas: + def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = None, lenient: bool = False) -> Cas: if isinstance(source, str): data = json.loads(source) else: @@ -97,7 +97,7 @@ def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = typesystem = merge_typesystems(typesystem, embedded_typesystem) - cas = Cas(typesystem=typesystem) + cas = Cas(typesystem=typesystem, lenient=lenient) feature_structures = {} json_feature_structures = data.get(FEATURE_STRUCTURES_FIELD) From c47bef517c18adaaae43fdd7b55a592ae4d37a25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Baumg=C3=A4rtner?= Date: Wed, 3 May 2023 15:00:04 +0200 Subject: [PATCH 2/2] merge_typesystem --- cassis/json.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/cassis/json.py b/cassis/json.py index 3fa8885..f52a480 100644 --- a/cassis/json.py +++ b/cassis/json.py @@ -37,7 +37,7 @@ NEGATIVE_INFINITE_VALUE_ABBR = "-Inf" -def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None, lenient: bool = False) -> Cas: +def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None, lenient: bool = False, merge_typesystem: bool =True) -> Cas: """Loads a CAS from a JSON source. Args: @@ -55,7 +55,7 @@ def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None, le typesystem = TypeSystem() deserializer = CasJsonDeserializer() - return deserializer.deserialize(source, typesystem=typesystem, lenient=lenient) + return deserializer.deserialize(source, typesystem=typesystem, lenient=lenient, merge_typesystem=merge_typesystem) class CasJsonDeserializer: @@ -64,7 +64,7 @@ def __init__(self): self._max_sofa_num = 0 self._post_processors = [] - def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = None, lenient: bool = False) -> Cas: + def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = None, lenient: bool = False, merge_typesystem: bool =True) -> Cas: if isinstance(source, str): data = json.loads(source) else: @@ -74,28 +74,29 @@ def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = self._max_sofa_num = 0 self._post_processors = [] - json_typesystem = data.get(TYPES_FIELD) - embedded_typesystem = TypeSystem( - add_document_annotation_type=not (json_typesystem.get(FLAG_DOCUMENT_ANNOTATION)) - ) + if merge_typesystem: + json_typesystem = data.get(TYPES_FIELD) + embedded_typesystem = TypeSystem( + add_document_annotation_type=not (json_typesystem.get(FLAG_DOCUMENT_ANNOTATION)) + ) - # First, build a dependency graph to support cases where a child type is defined before its super type - type_dependencies = defaultdict(set) - for type_name, json_type in json_typesystem.items(): - type_dependencies[type_name].add(json_type[SUPER_TYPE_FIELD]) + # First, build a dependency graph to support cases where a child type is defined before its super type + type_dependencies = defaultdict(set) + for type_name, json_type in json_typesystem.items(): + type_dependencies[type_name].add(json_type[SUPER_TYPE_FIELD]) - # Second, load all the types but no features since features of a type X might be of a later loaded type Y - for type_name in toposort_flatten(type_dependencies): - if is_predefined(type_name) or embedded_typesystem.contains_type(type_name): - continue + # Second, load all the types but no features since features of a type X might be of a later loaded type Y + for type_name in toposort_flatten(type_dependencies): + if is_predefined(type_name) or embedded_typesystem.contains_type(type_name): + continue - self._parse_type(embedded_typesystem, type_name, json_typesystem[type_name]) + self._parse_type(embedded_typesystem, type_name, json_typesystem[type_name]) - # Now we are sure we know all the types, we can create the features - for type_name, json_type in json_typesystem.items(): - self._parse_features(embedded_typesystem, type_name, json_type) + # Now we are sure we know all the types, we can create the features + for type_name, json_type in json_typesystem.items(): + self._parse_features(embedded_typesystem, type_name, json_type) - typesystem = merge_typesystems(typesystem, embedded_typesystem) + typesystem = merge_typesystems(typesystem, embedded_typesystem) cas = Cas(typesystem=typesystem, lenient=lenient)