Skip to content

Commit

Permalink
merge_typesystem
Browse files Browse the repository at this point in the history
  • Loading branch information
timbmg committed May 3, 2023
1 parent d85f8ff commit c47bef5
Showing 1 changed file with 21 additions and 20 deletions.
41 changes: 21 additions & 20 deletions cassis/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
NEGATIVE_INFINITE_VALUE_ABBR = "-Inf"


def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None, lenient: bool = False) -> Cas:
def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None, lenient: bool = False, merge_typesystem: bool =True) -> Cas:
"""Loads a CAS from a JSON source.
Args:
Expand All @@ -55,7 +55,7 @@ def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None, le
typesystem = TypeSystem()

deserializer = CasJsonDeserializer()
return deserializer.deserialize(source, typesystem=typesystem, lenient=lenient)
return deserializer.deserialize(source, typesystem=typesystem, lenient=lenient, merge_typesystem=merge_typesystem)


class CasJsonDeserializer:
Expand All @@ -64,7 +64,7 @@ def __init__(self):
self._max_sofa_num = 0
self._post_processors = []

def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = None, lenient: bool = False) -> Cas:
def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = None, lenient: bool = False, merge_typesystem: bool =True) -> Cas:
if isinstance(source, str):
data = json.loads(source)
else:
Expand All @@ -74,28 +74,29 @@ def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] =
self._max_sofa_num = 0
self._post_processors = []

json_typesystem = data.get(TYPES_FIELD)
embedded_typesystem = TypeSystem(
add_document_annotation_type=not (json_typesystem.get(FLAG_DOCUMENT_ANNOTATION))
)
if merge_typesystem:
json_typesystem = data.get(TYPES_FIELD)
embedded_typesystem = TypeSystem(
add_document_annotation_type=not (json_typesystem.get(FLAG_DOCUMENT_ANNOTATION))
)

# First, build a dependency graph to support cases where a child type is defined before its super type
type_dependencies = defaultdict(set)
for type_name, json_type in json_typesystem.items():
type_dependencies[type_name].add(json_type[SUPER_TYPE_FIELD])
# First, build a dependency graph to support cases where a child type is defined before its super type
type_dependencies = defaultdict(set)
for type_name, json_type in json_typesystem.items():
type_dependencies[type_name].add(json_type[SUPER_TYPE_FIELD])

# Second, load all the types but no features since features of a type X might be of a later loaded type Y
for type_name in toposort_flatten(type_dependencies):
if is_predefined(type_name) or embedded_typesystem.contains_type(type_name):
continue
# Second, load all the types but no features since features of a type X might be of a later loaded type Y
for type_name in toposort_flatten(type_dependencies):
if is_predefined(type_name) or embedded_typesystem.contains_type(type_name):
continue

self._parse_type(embedded_typesystem, type_name, json_typesystem[type_name])
self._parse_type(embedded_typesystem, type_name, json_typesystem[type_name])

# Now we are sure we know all the types, we can create the features
for type_name, json_type in json_typesystem.items():
self._parse_features(embedded_typesystem, type_name, json_type)
# Now we are sure we know all the types, we can create the features
for type_name, json_type in json_typesystem.items():
self._parse_features(embedded_typesystem, type_name, json_type)

typesystem = merge_typesystems(typesystem, embedded_typesystem)
typesystem = merge_typesystems(typesystem, embedded_typesystem)

cas = Cas(typesystem=typesystem, lenient=lenient)

Expand Down

0 comments on commit c47bef5

Please sign in to comment.