From 4aed3f21abb04e9ba838343091621da55c0d3c4f Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 13 Aug 2021 12:30:24 +0200
Subject: [PATCH 01/22] #168 - Experimental JSON CAS support

- Added very basic JSON CAS support
- No support for type systems yet
- No support for lenient loading
- Remove Cas:NULL via type name instead of puring simply the FS with ID 0 (which may not be a Cas:NULL fs)
- Added various constants for type names and feature names in the Cas class (analouge to the Apache UIMA Java SDK impl)
- WIP
---
 .gitignore                                    |   2 +
 cassis/__init__.py                            |   2 +
 cassis/cas.py                                 |  99 ++++++-
 cassis/json.py                                | 273 ++++++++++++++++++
 cassis/typesystem.py                          |  10 +-
 tests/test_files/json/README.md               |   5 +
 .../ser-ref/casWithSofaDataArray/data.json    |  21 ++
 .../casWithSofaDataArray/debug-typesystem.xml |  17 ++
 .../ser-ref/casWithSofaDataArray/debug.xmi    |   5 +
 .../ser-ref/casWithSofaDataURI/data.json      |  17 ++
 .../casWithSofaDataURI/debug-typesystem.xml   |  17 ++
 .../ser-ref/casWithSofaDataURI/debug.xmi      |   4 +
 .../fs_as_array/ser-ref/casWithText/data.json |  24 ++
 .../ser-ref/casWithText/debug-typesystem.xml  |  17 ++
 .../fs_as_array/ser-ref/casWithText/debug.xmi |   6 +
 .../casWithTextAndAnnotation/data.json        |  30 ++
 .../debug-typesystem.xml                      |  17 ++
 .../casWithTextAndAnnotation/debug.xmi        |   7 +
 tests/test_json.py                            |  26 ++
 tests/util.py                                 |  34 +++
 20 files changed, 628 insertions(+), 5 deletions(-)
 create mode 100644 cassis/json.py
 create mode 100644 tests/test_files/json/README.md
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithText/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithText/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
 create mode 100644 tests/test_json.py

diff --git a/.gitignore b/.gitignore
index 0d13f2a..e2de877 100644
--- a/.gitignore
+++ b/.gitignore
@@ -221,3 +221,5 @@ expected.xml
 difference.diff
 
 xml_issue.py
+actual.json
+expected.json
diff --git a/cassis/__init__.py b/cassis/__init__.py
index 651988c..4d90bba 100644
--- a/cassis/__init__.py
+++ b/cassis/__init__.py
@@ -1,6 +1,7 @@
 """UIMA CAS processing library in Python."""
 
 from .cas import Cas, Sofa, View
+from .json import load_cas_from_json
 from .typesystem import TypeSystem, load_dkpro_core_typesystem, load_typesystem, merge_typesystems
 from .xmi import load_cas_from_xmi
 
@@ -13,4 +14,5 @@
     "load_dkpro_core_typesystem",
     "merge_typesystems",
     "load_cas_from_xmi",
+    "load_cas_from_json",
 ]
diff --git a/cassis/cas.py b/cassis/cas.py
index 10c06b6..d56abae 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -77,6 +77,9 @@ def cassis_to_uima(self, idx: Optional[int]) -> Optional[int]:
 class Sofa:
     """Each CAS has one or more Subject of Analysis (SofA)"""
 
+    #: str: The type
+    type = "uima.cas.Sofa"
+
     #: int: The sofaNum
     sofaNum = attr.ib(validator=validators.instance_of(int))
 
@@ -95,6 +98,9 @@ class Sofa:
     #: str: The sofa URI, it references remote sofa data
     sofaURI = attr.ib(default=None, validator=_validator_optional_string)
 
+    #: str: The sofa data byte array
+    sofaArray = attr.ib(default=None)
+
     #: OffsetConverter: Converts from UIMA UTF-16 based offsets to Unicode codepoint offsets and back
     _offset_converter = attr.ib(factory=OffsetConverter, eq=False, hash=False)
 
@@ -171,6 +177,40 @@ def __init__(self, typesystem: TypeSystem):
 class Cas:
     """A CAS object is a container for text (sofa) and annotations"""
 
+    NAME_SPACE_UIMA_CAS = "uima" + TypeSystem.NAMESPACE_SEPARATOR + "cas"
+    UIMA_CAS_PREFIX = NAME_SPACE_UIMA_CAS + TypeSystem.NAMESPACE_SEPARATOR
+    TYPE_NAME_TOP = UIMA_CAS_PREFIX + "TOP"
+    TYPE_NAME_INTEGER = UIMA_CAS_PREFIX + "Integer"
+    TYPE_NAME_FLOAT = UIMA_CAS_PREFIX + "Float"
+    TYPE_NAME_STRING = UIMA_CAS_PREFIX + "String"
+    TYPE_NAME_BOOLEAN = UIMA_CAS_PREFIX + "Boolean"
+    TYPE_NAME_BYTE = UIMA_CAS_PREFIX + "Byte"
+    TYPE_NAME_SHORT = UIMA_CAS_PREFIX + "Short"
+    TYPE_NAME_LONG = UIMA_CAS_PREFIX + "Long"
+    TYPE_NAME_DOUBLE = UIMA_CAS_PREFIX + "Double"
+    TYPE_NAME_ARRAY_BASE = UIMA_CAS_PREFIX + "ArrayBase"
+    TYPE_NAME_FS_ARRAY = UIMA_CAS_PREFIX + "FSArray"
+    TYPE_NAME_INTEGER_ARRAY = UIMA_CAS_PREFIX + "IntegerArray"
+    TYPE_NAME_FLOAT_ARRAY = UIMA_CAS_PREFIX + "FloatArray"
+    TYPE_NAME_STRING_ARRAY = UIMA_CAS_PREFIX + "StringArray"
+    TYPE_NAME_BOOLEAN_ARRAY = UIMA_CAS_PREFIX + "BooleanArray"
+    TYPE_NAME_BYTE_ARRAY = UIMA_CAS_PREFIX + "ByteArray"
+    TYPE_NAME_SHORT_ARRAY = UIMA_CAS_PREFIX + "ShortArray"
+    TYPE_NAME_LONG_ARRAY = UIMA_CAS_PREFIX + "LongArray"
+    TYPE_NAME_DOUBLE_ARRAY = UIMA_CAS_PREFIX + "DoubleArray"
+    TYPE_NAME_FS_HASH_SET = UIMA_CAS_PREFIX + "FSHashSet"
+    TYPE_NAME_SOFA = UIMA_CAS_PREFIX + "Sofa"
+    TYPE_NAME_ANNOTATION_BASE = UIMA_CAS_PREFIX + "AnnotationBase"
+
+    FEATURE_BASE_NAME_SOFANUM = "sofaNum"
+    FEATURE_BASE_NAME_SOFAID = "sofaID"
+    FEATURE_BASE_NAME_SOFAMIME = "mimeType"
+    FEATURE_BASE_NAME_SOFAURI = "sofaURI"
+    FEATURE_BASE_NAME_SOFASTRING = "sofaString"
+    FEATURE_BASE_NAME_SOFAARRAY = "sofaArray"
+
+    NAME_DEFAULT_SOFA = "_InitialView"
+
     def __init__(self, typesystem: TypeSystem = None, lenient: bool = False):
         """ Creates a CAS with the specified typesystem. If no typesystem is given, then the default one
         is used which only contains UIMA-predefined types.
@@ -321,6 +361,7 @@ def get_covered_text(self, annotation: FeatureStructure) -> str:
 
     def select(self, type_name: str) -> List[FeatureStructure]:
         """ Finds all annotations of type `type_name`.
+        """Finds all annotations of type `type_name`.
 
         Args:
             type_name: The name of the type whose annotation instances are to be found
@@ -492,14 +533,33 @@ def sofa_uri(self) -> str:
 
     @sofa_uri.setter
     def sofa_uri(self, value: str):
-        """ Sets the sofa URI to `value`.
+        """Sets the sofa URI to `value`.
 
         Args:
-            value: The new sofa MIME type.
+            value: The new sofa URI.
 
         """
         self.get_sofa().sofaURI = value
 
+    @property
+    def sofa_array(self) -> str:
+        """The sofa byte array references a ByteArrayFS feature structure
+
+        Returns: The sofa data byte array.
+
+        """
+        return self.get_sofa().sofaArray
+
+    @sofa_array.setter
+    def sofa_array(self, value: "uima_cas_ByteArrayFS"):
+        """Sets the sofa byte array to the given ByteArrayFS feature structure.
+
+        Args:
+            value: The new sofa byte array type.
+
+        """
+        self.get_sofa().sofaArray = value
+
     def to_xmi(self, path: Union[str, Path, None] = None, pretty_print: bool = False) -> Optional[str]:
         """Creates a XMI representation of this CAS.
 
@@ -514,8 +574,36 @@ def to_xmi(self, path: Union[str, Path, None] = None, pretty_print: bool = False
         """
         from cassis.xmi import CasXmiSerializer
 
-        serializer = CasXmiSerializer()
+        return self.serialize(CasXmiSerializer(), path, pretty_print)
+
+    def to_json(self, path: Union[str, Path, None] = None, pretty_print: bool = False) -> Optional[str]:
+        """Creates a JSON representation of this CAS.
 
+        Args:
+            path: File path, if `None` is provided the result is returned as a string
+            pretty_print: `True` if the resulting JSON should be pretty-printed, else `False`
+
+
+        Returns:
+            If `path` is None, then the JSON representation of this CAS is returned as a string
+
+        """
+        from cassis.json import CasJsonSerializer
+
+        return self.serialize(CasJsonSerializer(), path, pretty_print)
+
+    def serialize(self, serializer, path: Union[str, Path, None] = None, pretty_print: bool = False):
+        """Runs this CAS through the given serializer.
+
+        Args:
+            path: File path, if `None` is provided the result is returned as a string
+            pretty_print: `True` if the resulting data should be pretty-printed, else `False`
+
+
+        Returns:
+            If `path` is None, then the data representation of this CAS is returned as a string
+
+        """
         # If `path` is None, then serialize to a string and return it
         if path is None:
             sink = BytesIO()
@@ -591,7 +679,10 @@ def _find_all_fs(self) -> Iterable[FeatureStructure]:
                         openlist.append(referenced_fs)
 
         # We do not want to return cas:NULL here as we handle serializing it later
-        all_fs.pop(0, None)
+        for fs_id, fs in list(all_fs.items()):
+            if fs.type == "uima.cas.NULL":
+                all_fs.pop(fs_id)
+
         yield from all_fs.values()
 
     def _get_next_xmi_id(self) -> int:
diff --git a/cassis/json.py b/cassis/json.py
new file mode 100644
index 0000000..b0bd9d3
--- /dev/null
+++ b/cassis/json.py
@@ -0,0 +1,273 @@
+import base64
+import json
+import warnings
+from collections import OrderedDict, defaultdict
+from io import BytesIO, TextIOWrapper
+from typing import IO, Dict, Iterable, List, Optional, Set, Union
+
+import attr
+from lxml import etree
+
+from cassis.cas import Cas, IdGenerator, Sofa, View
+from cassis.typesystem import FeatureStructure, TypeNotFoundError, TypeSystem
+
+RESERVED_FIELD_PREFIX = "%"
+TYPE_FIELD = RESERVED_FIELD_PREFIX + "TYPE"
+RANGE_FIELD = RESERVED_FIELD_PREFIX + "RANGE"
+TYPES_FIELD = RESERVED_FIELD_PREFIX + "TYPES"
+FEATURES_FIELD = RESERVED_FIELD_PREFIX + "FEATURES"
+VIEWS_FIELD = RESERVED_FIELD_PREFIX + "VIEWS"
+VIEW_SOFA_FIELD = RESERVED_FIELD_PREFIX + "SOFA"
+VIEW_INDEX_FIELD = RESERVED_FIELD_PREFIX + "INDEX"
+FEATURE_STRUCTURES_FIELD = RESERVED_FIELD_PREFIX + "FEATURE_STRUCTURES"
+REF_FEATURE_PREFIX = "@"
+NAME_FIELD = RESERVED_FIELD_PREFIX + "NAME"
+SUPER_TYPE_FIELD = RESERVED_FIELD_PREFIX + "SUPER_TYPE"
+ELEMENT_TYPE_FIELD = RESERVED_FIELD_PREFIX + "ELEMENT_TYPE"
+ID_FIELD = RESERVED_FIELD_PREFIX + "ID"
+FLAGS_FIELD = RESERVED_FIELD_PREFIX + "FLAGS"
+FLAG_DOCUMENT_ANNOTATION = "DocumentAnnotation"
+ARRAY_SUFFIX = "[]"
+ELEMENTS_FIELD = RESERVED_FIELD_PREFIX + "ELEMENTS"
+
+
+def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None) -> Cas:
+    """Loads a CAS from a JSON source.
+
+    Args:
+        source: The JSON source. If `source` is a string, then it is assumed to be an JSON string.
+            If `source` is a file-like object, then the data is read from it.
+        typesystem: The type system that belongs to this CAS. If `None`, an empty type system is provided.
+        lenient: If `True`, unknown Types will be ignored. If `False`, unknown Types will cause an exception.
+            The default is `False`.
+
+    Returns:
+        The deserialized CAS
+
+    """
+    if typesystem is None:
+        typesystem = TypeSystem()
+
+    deserializer = CasJsonDeserializer()
+    return deserializer.deserialize(source, typesystem=typesystem)
+
+
+class CasJsonDeserializer:
+    def __init__(self):
+        self._max_xmi_id = 0
+        self._max_sofa_num = 0
+        self._post_processors = []
+
+    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem) -> Cas:
+        if isinstance(source, str):
+            data = json.loads(source)
+        else:
+            data = json.load(source)
+
+        feature_structures = {}
+
+        self._max_xmi_id = 0
+        self._max_sofa_num = 0
+        self._post_processors = []
+
+        data.get(TYPES_FIELD)  # FIXME
+
+        cas = Cas(typesystem=typesystem)
+
+        json_feature_structures = data.get(FEATURE_STRUCTURES_FIELD)
+        if isinstance(json_feature_structures, list):
+            for json_fs in json_feature_structures:
+                if json_fs.get(TYPE_FIELD) == Cas.TYPE_NAME_SOFA:
+                    fs_id = json_fs.get(ID_FIELD)
+                    fs = self._parse_sofa(cas, fs_id, json_fs, feature_structures)
+                else:
+                    fs_id = json_fs.get(ID_FIELD)
+                    fs = self._parse_feature_structure(typesystem, fs_id, json_fs, feature_structures)
+                feature_structures[fs.xmiID] = fs
+
+        if isinstance(json_feature_structures, dict):
+            for fs_id, json_fs in json_feature_structures.items():
+                if json_fs.get(TYPE_FIELD) == Cas.TYPE_NAME_SOFA:
+                    fs_id = int(fs_id)
+                    fs = self._parse_sofa(cas, fs_id, json_fs, feature_structures)
+                else:
+                    fs_id = int(fs_id)
+                    fs = self._parse_feature_structure(typesystem, fs_id, json_fs, feature_structures)
+                feature_structures[fs.xmiID] = fs
+
+        for post_processor in self._post_processors:
+            post_processor()
+
+        cas._xmi_id_generator = IdGenerator(self._max_xmi_id + 1)
+        cas._sofa_num_generator = IdGenerator(self._max_sofa_num + 1)
+
+        # At this point all views for which we have a sofa with a known ID and sofaNum have already been created
+        # as part of parsing the feature structures. Thus, if there are any views remaining that are only declared
+        # in the views section, we just create them with auto-assigned IDs
+        json_views = data.get(VIEWS_FIELD)
+        for view_name, json_view in json_views.items():
+            self._parse_view(cas, view_name, json_view, feature_structures)
+
+        return cas
+
+    def _get_or_create_view(
+        self, cas: Cas, view_name: str, fs_id: Optional[int] = None, sofa_num: Optional[int] = None
+    ) -> Cas:
+        if view_name == Cas.NAME_DEFAULT_SOFA:
+            view = cas.get_view(Cas.NAME_DEFAULT_SOFA)
+
+            # We need to make sure that the sofa gets the real xmi, see #155
+            if fs_id is not None:
+                view.get_sofa().xmiID = fs_id
+
+            return view
+        else:
+            return cas.create_view(view_name, xmiID=fs_id, sofaNum=sofa_num)
+
+    def _parse_view(self, cas: Cas, view_name: str, json_view: Dict[str, any], feature_structures: List):
+        view = self._get_or_create_view(cas, view_name)
+        for member_id in json_view[VIEW_INDEX_FIELD]:
+            fs = feature_structures[member_id]
+            view.add_annotation(fs, keep_id=True)
+
+    def _parse_sofa(self, cas: Cas, fs_id: int, json_fs: Dict[str, any], feature_structures: Dict[int, any]) -> Sofa:
+        view = self._get_or_create_view(
+            cas, json_fs.get(Cas.FEATURE_BASE_NAME_SOFAID), fs_id, json_fs.get(Cas.FEATURE_BASE_NAME_SOFANUM)
+        )
+
+        view.sofa_string = json_fs.get(Cas.FEATURE_BASE_NAME_SOFASTRING)
+        view.sofa_mime = json_fs.get(Cas.FEATURE_BASE_NAME_SOFAMIME)
+        view.sofa_uri = json_fs.get(Cas.FEATURE_BASE_NAME_SOFAURI)
+        view.sofa_array = feature_structures.get(json_fs.get(REF_FEATURE_PREFIX + Cas.FEATURE_BASE_NAME_SOFAARRAY))
+
+        return view.get_sofa()
+
+    def _parse_feature_structure(
+        self, typesystem: TypeSystem, fs_id: int, json_fs: Dict[str, any], feature_structures: Dict[int, any]
+    ):
+        AnnotationType = typesystem.get_type(json_fs.get(TYPE_FIELD))
+
+        attributes = dict(json_fs)
+
+        # Map the JSON FS ID to xmiID
+        attributes["xmiID"] = fs_id
+
+        # Remap features that use a reserved Python name
+        if "self" in attributes:
+            attributes["self_"] = attributes.pop("self")
+
+        if "type" in attributes:
+            attributes["type_"] = attributes.pop("type")
+
+        if AnnotationType.name == Cas.TYPE_NAME_BYTE_ARRAY:
+            attributes["elements"] = base64.b64decode(attributes.get(ELEMENTS_FIELD))
+
+        self._resolve_references(attributes, feature_structures)
+        self._strip_reserved_json_keys(attributes)
+
+        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
+        return AnnotationType(**attributes)
+
+    def _resolve_references(self, attributes: Dict[str, any], feature_structures: Dict[int, any]):
+        for key, value in list(attributes.items()):
+            if key.startswith(REF_FEATURE_PREFIX):
+                attributes.pop(key)
+                feature_name = key[1:]
+                target_fs = feature_structures.get(value)
+                if target_fs:
+                    # Resolve id-ref now
+                    attributes[feature_name] = target_fs
+                else:
+                    # Resolve id-ref at the end of processing
+                    def fix_up():
+                        attributes[feature_name] = feature_structures.get(value)
+
+                    self._post_processors.append(fix_up)
+
+    def _strip_reserved_json_keys(
+        self,
+        attributes: Dict[str, any],
+    ):
+        for key in list(attributes):
+            if key.startswith(RESERVED_FIELD_PREFIX):
+                attributes.pop(key)
+
+
+class CasJsonSerializer:
+    _COMMON_FIELD_NAMES = {"xmiID", "type"}
+
+    def __init__(self):
+        pass
+
+    def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
+        data = {}
+        types = data[TYPES_FIELD] = {}
+        views = data[VIEWS_FIELD] = {}
+        feature_structures = data[FEATURE_STRUCTURES_FIELD] = []
+
+        for view in cas.views:
+            views[view.sofa.sofaID] = self._serialize_view(view)
+            if view.sofa.sofaArray:
+                json_sofa_array_fs = self._serialize_feature_structure(cas, view.sofa.sofaArray)
+                feature_structures.append(json_sofa_array_fs)
+            json_sofa_fs = self._serialize_feature_structure(cas, view.sofa)
+            feature_structures.append(json_sofa_fs)
+
+        # Find all fs, even the ones that are not directly added to a sofa
+        for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID):
+            json_fs = self._serialize_feature_structure(cas, fs)
+            feature_structures.append(json_fs)
+
+        if isinstance(sink, BytesIO):
+            sink = TextIOWrapper(sink, encoding="utf-8", write_through=True)
+
+        if sink:
+            json.dump(data, sink, sort_keys=False)
+        else:
+            json.dumps(data, sort_keys=False)
+
+        if isinstance(sink, TextIOWrapper):
+            sink.detach()  # Prevent TextIOWrapper from closing the BytesIO
+
+    def _serialize_feature_structure(self, cas, fs) -> dict:
+        json_fs = OrderedDict()
+        json_fs[ID_FIELD] = fs.xmiID
+        json_fs[TYPE_FIELD] = fs.type
+
+        ts = cas.typesystem
+        t = ts.get_type(fs.type)
+        for feature in t.all_features:
+            if feature.name in CasJsonSerializer._COMMON_FIELD_NAMES:
+                continue
+
+            feature_name = feature.name
+
+            # Strip the underscore we added for reserved names
+            if feature._has_reserved_name:
+                feature_name = feature.name[:-1]
+
+            # Skip over 'None' features
+            value = getattr(fs, feature.name)
+            if value is None:
+                continue
+
+            # Map back from offsets in Unicode codepoints to UIMA UTF-16 based offsets
+            # if ts.is_instance_of(fs.type, "uima.tcas.Annotation") and feature_name == "begin" or feature_name == "end":
+            #    sofa: Sofa = getattr(fs, "sofa")
+            #    value = sofa._offset_converter.cassis_to_uima(value)
+
+            if t.name == Cas.TYPE_NAME_BYTE_ARRAY and feature_name == "elements":
+                json_fs[ELEMENTS_FIELD] = base64.b64encode(value).decode("ascii")
+            elif t.supertypeName == Cas.TYPE_NAME_ARRAY_BASE and feature_name == "elements":
+                json_fs[ELEMENTS_FIELD] = value
+            elif ts.is_primitive(feature.rangeTypeName):
+                json_fs[feature_name] = value
+            elif ts.is_collection(fs.type, feature):
+                json_fs[REF_FEATURE_PREFIX + feature_name] = value.xmiID
+            else:
+                # We need to encode non-primitive features as a reference
+                json_fs[REF_FEATURE_PREFIX + feature_name] = value.xmiID
+        return json_fs
+
+    def _serialize_view(self, view: View):
+        return {VIEW_SOFA_FIELD: view.sofa.xmiID, VIEW_INDEX_FIELD: sorted(x.xmiID for x in view.get_all_annotations())}
diff --git a/cassis/typesystem.py b/cassis/typesystem.py
index 38e0d1e..e846e37 100644
--- a/cassis/typesystem.py
+++ b/cassis/typesystem.py
@@ -349,6 +349,8 @@ def descendants(self) -> Iterator["Type"]:
 
 
 class TypeSystem:
+    NAMESPACE_SEPARATOR = "."
+
     def __init__(self, add_document_annotation_type: bool = True):
         self._types = {}
 
@@ -424,7 +426,13 @@ def __init__(self, add_document_annotation_type: bool = True):
         self.add_feature(t, name="sofaNum", rangeTypeName="uima.cas.Integer")
         self.add_feature(t, name="sofaID", rangeTypeName="uima.cas.String")
         self.add_feature(t, name="mimeType", rangeTypeName="uima.cas.String")
-        self.add_feature(t, name="sofaArray", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
+        self.add_feature(
+            t,
+            name="sofaArray",
+            rangeTypeName="uima.cas.ByteArray",
+            elementType="uima.cas.Byte",
+            multipleReferencesAllowed=True,
+        )
         self.add_feature(t, name="sofaString", rangeTypeName="uima.cas.String")
         self.add_feature(t, name="sofaURI", rangeTypeName="uima.cas.String")
 
diff --git a/tests/test_files/json/README.md b/tests/test_files/json/README.md
new file mode 100644
index 0000000..483853a
--- /dev/null
+++ b/tests/test_files/json/README.md
@@ -0,0 +1,5 @@
+Test files in this folder were sourced from
+
+https://github.com/apache/uima-uimaj/tree/feature/UIMA-6266-Clean-JSON-Wire-Format-for-CAS/uimaj-json/src/test/resources/CasSerializationDeserialization_JsonCas2_FsAsArray_Test/ser-ref
+
+Apache License 2.0
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
new file mode 100644
index 0000000..b732eaf
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
@@ -0,0 +1,21 @@
+{
+  "%TYPES" : { },
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 0,
+      "%INDEX" : [ ]
+    }
+  },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.ByteArray",
+    "%ELEMENTS" : "VGhpcyBpcyBhIHRlc3Q="
+  }, {
+    "%ID" : 0,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text/plain",
+    "@sofaArray" : 1
+  } ]
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/debug.xmi
new file mode 100644
index 0000000..89075f6
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/debug.xmi
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <cas:Sofa xmi:id="2" sofaNum="1" sofaID="_InitialView" mimeType="text/plain" sofaArray="1"/>
+    <cas:ByteArray xmi:id="1" elements="5468697320697320612074657374"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
new file mode 100644
index 0000000..678d9e2
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
@@ -0,0 +1,17 @@
+{
+  "%TYPES" : { },
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 0,
+      "%INDEX" : [ ]
+    }
+  },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 0,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text/plain",
+    "sofaURI" : "classpath:/ProgrammaticallyCreatedCasDataSuite/document.txt"
+  } ]
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/debug.xmi
new file mode 100644
index 0000000..89966e0
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/debug.xmi
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text/plain" sofaURI="classpath:/ProgrammaticallyCreatedCasDataSuite/document.txt"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
new file mode 100644
index 0000000..416e6e1
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
@@ -0,0 +1,24 @@
+{
+  "%TYPES" : { },
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 0,
+      "%INDEX" : [ 1 ]
+    }
+  },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 0,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text",
+    "sofaString" : "This is a test."
+  }, {
+    "%ID" : 1,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 0,
+    "begin" : 0,
+    "end" : 15,
+    "language" : "x-unspecified"
+  } ]
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithText/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithText/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithText/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithText/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithText/debug.xmi
new file mode 100644
index 0000000..943df5f
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithText/debug.xmi
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="15" language="x-unspecified"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="This is a test."/>
+    <cas:View sofa="1" members="2"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
new file mode 100644
index 0000000..aa71704
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
@@ -0,0 +1,30 @@
+{
+  "%TYPES" : { },
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 0,
+      "%INDEX" : [ 1, 2 ]
+    }
+  },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 0,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text",
+    "sofaString" : "This is a test."
+  }, {
+    "%ID" : 1,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 0,
+    "begin" : 0,
+    "end" : 15,
+    "language" : "x-unspecified"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 0,
+    "begin" : 0,
+    "end" : 15
+  } ]
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
new file mode 100644
index 0000000..7292031
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="15" language="x-unspecified"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="0" end="15"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="This is a test."/>
+    <cas:View sofa="1" members="2 3"/>
+</xmi:XMI>
diff --git a/tests/test_json.py b/tests/test_json.py
new file mode 100644
index 0000000..a7aaaab
--- /dev/null
+++ b/tests/test_json.py
@@ -0,0 +1,26 @@
+import json
+
+from tests.fixtures import *
+from tests.util import assert_json_equal
+
+FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files")
+
+FIXTURES = [
+    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithSofaDataArray")),
+    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithSofaDataURI")),
+    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithText")),
+    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithTextAndAnnotation")),
+]
+
+
+@pytest.mark.parametrize("json_path", FIXTURES)
+def test_deserialization_serialization(json_path):
+    with open(os.path.join(json_path, "data.json"), "rb") as f:
+        cas = load_cas_from_json(f)
+
+    with open(os.path.join(json_path, "data.json"), "rb") as f:
+        expected_json = json.load(f)
+
+    actual_json = cas.to_json()
+
+    assert_json_equal(actual_json, expected_json)
diff --git a/tests/util.py b/tests/util.py
index 7dbd925..8a7d780 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -1,4 +1,5 @@
 import difflib
+import json
 from typing import IO, Union
 
 import lxml_asserts
@@ -39,6 +40,39 @@ def assert_xml_equal(actual: Union[IO, str], expected: Union[IO, str]):
         raise e
 
 
+def assert_json_equal(actual: str, expected: Union[IO, str]):
+    """Checks whether the JSON trees behind `actual` and `expected` are equal.
+
+    Args:
+        actual: The actual JSON
+        expected: The expected JSON
+
+    Throws:
+        AssertionError when json(actual) != json(expected)
+    """
+    actual_json = json.dumps(json.loads(actual), sort_keys=True, indent=2)
+    expected_json = json.dumps(expected, sort_keys=True, indent=2)
+
+    try:
+        assert actual_json == expected_json
+    except AssertionError as e:
+        # For debugging purposes, the trees are saved to later inspect their contents
+        with open("actual.json", "w") as f:
+            f.write(actual_json)
+
+        with open("expected.json", "w") as f:
+            f.write(expected_json)
+
+        with open("difference.diff", "w") as f:
+            diff = difflib.unified_diff(
+                actual_json.splitlines(), expected_json.splitlines(), fromfile="Actual", tofile="Expected"
+            )
+            diff_string = "\n".join(diff)
+            f.write(diff_string)
+
+        raise e
+
+
 def _to_etree(source: Union[IO, str]) -> etree.Element:
     parser = etree.XMLParser(remove_blank_text=True)
 

From 79db38bf6984c5696212ed7e891a3a76f6b1612a Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 13 Aug 2021 12:33:44 +0200
Subject: [PATCH 02/22] #168 - Experimental JSON CAS support

- Fixed bad PyDoc comment
---
 cassis/cas.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cassis/cas.py b/cassis/cas.py
index d56abae..3bc34e4 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -360,7 +360,6 @@ def get_covered_text(self, annotation: FeatureStructure) -> str:
         return sofa.sofaString[annotation.begin : annotation.end]
 
     def select(self, type_name: str) -> List[FeatureStructure]:
-        """ Finds all annotations of type `type_name`.
         """Finds all annotations of type `type_name`.
 
         Args:

From c47b2badf6e16b62bb600b5b413bbba62f578b55 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 13 Aug 2021 12:37:03 +0200
Subject: [PATCH 03/22] #168 - Experimental JSON CAS support

- Fixed linter error because type hint was referring to a dynamically created type
---
 cassis/cas.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cassis/cas.py b/cassis/cas.py
index 323b3cb..01bc22c 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -542,7 +542,7 @@ def sofa_uri(self, value: str):
 
     @property
     def sofa_array(self) -> str:
-        """The sofa byte array references a ByteArrayFS feature structure
+        """The sofa byte array references a uima.cas.ByteArray feature structure
 
         Returns: The sofa data byte array.
 
@@ -550,11 +550,11 @@ def sofa_array(self) -> str:
         return self.get_sofa().sofaArray
 
     @sofa_array.setter
-    def sofa_array(self, value: "uima_cas_ByteArrayFS"):
-        """Sets the sofa byte array to the given ByteArrayFS feature structure.
+    def sofa_array(self, value):
+        """Sets the sofa byte array to the given uima.cas.ByteArray feature structure.
 
         Args:
-            value: The new sofa byte array type.
+            value: The new sofa byte array feature structure.
 
         """
         self.get_sofa().sofaArray = value

From 8239fe55eec7011253a525884b71843a71ab128e Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 13 Aug 2021 12:45:11 +0200
Subject: [PATCH 04/22] #168 - Experimental JSON CAS support

- Roll back change of Sofa.sofaArray range type from uima.cas.ByteArray back to uima.cas.TOP which is indeed the range type also used in the Apache UIMA Java SDK - despite only uima.cas.ByteArray being acceptable...
---
 cassis/typesystem.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/cassis/typesystem.py b/cassis/typesystem.py
index da77e64..fabed0f 100644
--- a/cassis/typesystem.py
+++ b/cassis/typesystem.py
@@ -426,13 +426,7 @@ def __init__(self, add_document_annotation_type: bool = True):
         self.add_feature(t, name="sofaNum", rangeTypeName="uima.cas.Integer")
         self.add_feature(t, name="sofaID", rangeTypeName="uima.cas.String")
         self.add_feature(t, name="mimeType", rangeTypeName="uima.cas.String")
-        self.add_feature(
-            t,
-            name="sofaArray",
-            rangeTypeName="uima.cas.ByteArray",
-            elementType="uima.cas.Byte",
-            multipleReferencesAllowed=True,
-        )
+        self.add_feature(t, name="sofaArray", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
         self.add_feature(t, name="sofaString", rangeTypeName="uima.cas.String")
         self.add_feature(t, name="sofaURI", rangeTypeName="uima.cas.String")
 

From d1177581cb322c1090c4c323c219b429b4081b61 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 13 Aug 2021 16:05:13 +0200
Subject: [PATCH 05/22] #168 - Experimental JSON CAS support

- Added generator for random CASes
- Added JSON tests using random CAS generator
- Added support for (de)serializing type system information in the JSON format
- Move the type/feature name constants from Cas to typesystem.py
---
 cassis/cas.py                           |  40 +-------
 cassis/json.py                          | 122 ++++++++++++++++++------
 cassis/typesystem.py                    |  53 +++++++++-
 tests/test_files/test_cas_generators.py |  42 ++++++++
 tests/test_json.py                      |  16 ++++
 tests/util.py                           |   8 +-
 6 files changed, 213 insertions(+), 68 deletions(-)
 create mode 100644 tests/test_files/test_cas_generators.py

diff --git a/cassis/cas.py b/cassis/cas.py
index 01bc22c..ca74543 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -177,40 +177,6 @@ def __init__(self, typesystem: TypeSystem):
 class Cas:
     """A CAS object is a container for text (sofa) and annotations"""
 
-    NAME_SPACE_UIMA_CAS = "uima" + TypeSystem.NAMESPACE_SEPARATOR + "cas"
-    UIMA_CAS_PREFIX = NAME_SPACE_UIMA_CAS + TypeSystem.NAMESPACE_SEPARATOR
-    TYPE_NAME_TOP = UIMA_CAS_PREFIX + "TOP"
-    TYPE_NAME_INTEGER = UIMA_CAS_PREFIX + "Integer"
-    TYPE_NAME_FLOAT = UIMA_CAS_PREFIX + "Float"
-    TYPE_NAME_STRING = UIMA_CAS_PREFIX + "String"
-    TYPE_NAME_BOOLEAN = UIMA_CAS_PREFIX + "Boolean"
-    TYPE_NAME_BYTE = UIMA_CAS_PREFIX + "Byte"
-    TYPE_NAME_SHORT = UIMA_CAS_PREFIX + "Short"
-    TYPE_NAME_LONG = UIMA_CAS_PREFIX + "Long"
-    TYPE_NAME_DOUBLE = UIMA_CAS_PREFIX + "Double"
-    TYPE_NAME_ARRAY_BASE = UIMA_CAS_PREFIX + "ArrayBase"
-    TYPE_NAME_FS_ARRAY = UIMA_CAS_PREFIX + "FSArray"
-    TYPE_NAME_INTEGER_ARRAY = UIMA_CAS_PREFIX + "IntegerArray"
-    TYPE_NAME_FLOAT_ARRAY = UIMA_CAS_PREFIX + "FloatArray"
-    TYPE_NAME_STRING_ARRAY = UIMA_CAS_PREFIX + "StringArray"
-    TYPE_NAME_BOOLEAN_ARRAY = UIMA_CAS_PREFIX + "BooleanArray"
-    TYPE_NAME_BYTE_ARRAY = UIMA_CAS_PREFIX + "ByteArray"
-    TYPE_NAME_SHORT_ARRAY = UIMA_CAS_PREFIX + "ShortArray"
-    TYPE_NAME_LONG_ARRAY = UIMA_CAS_PREFIX + "LongArray"
-    TYPE_NAME_DOUBLE_ARRAY = UIMA_CAS_PREFIX + "DoubleArray"
-    TYPE_NAME_FS_HASH_SET = UIMA_CAS_PREFIX + "FSHashSet"
-    TYPE_NAME_SOFA = UIMA_CAS_PREFIX + "Sofa"
-    TYPE_NAME_ANNOTATION_BASE = UIMA_CAS_PREFIX + "AnnotationBase"
-
-    FEATURE_BASE_NAME_SOFANUM = "sofaNum"
-    FEATURE_BASE_NAME_SOFAID = "sofaID"
-    FEATURE_BASE_NAME_SOFAMIME = "mimeType"
-    FEATURE_BASE_NAME_SOFAURI = "sofaURI"
-    FEATURE_BASE_NAME_SOFASTRING = "sofaString"
-    FEATURE_BASE_NAME_SOFAARRAY = "sofaArray"
-
-    NAME_DEFAULT_SOFA = "_InitialView"
-
     def __init__(self, typesystem: TypeSystem = None, lenient: bool = False):
         """Creates a CAS with the specified typesystem. If no typesystem is given, then the default one
         is used which only contains UIMA-predefined types.
@@ -573,7 +539,7 @@ def to_xmi(self, path: Union[str, Path, None] = None, pretty_print: bool = False
         """
         from cassis.xmi import CasXmiSerializer
 
-        return self.serialize(CasXmiSerializer(), path, pretty_print)
+        return self._serialize(CasXmiSerializer(), path, pretty_print)
 
     def to_json(self, path: Union[str, Path, None] = None, pretty_print: bool = False) -> Optional[str]:
         """Creates a JSON representation of this CAS.
@@ -589,9 +555,9 @@ def to_json(self, path: Union[str, Path, None] = None, pretty_print: bool = Fals
         """
         from cassis.json import CasJsonSerializer
 
-        return self.serialize(CasJsonSerializer(), path, pretty_print)
+        return self._serialize(CasJsonSerializer(), path, pretty_print)
 
-    def serialize(self, serializer, path: Union[str, Path, None] = None, pretty_print: bool = False):
+    def _serialize(self, serializer, path: Union[str, Path, None] = None, pretty_print: bool = False):
         """Runs this CAS through the given serializer.
 
         Args:
diff --git a/cassis/json.py b/cassis/json.py
index b0bd9d3..34cac0e 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -1,15 +1,10 @@
 import base64
 import json
-import warnings
-from collections import OrderedDict, defaultdict
-from io import BytesIO, TextIOWrapper
-from typing import IO, Dict, Iterable, List, Optional, Set, Union
-
-import attr
-from lxml import etree
+from collections import OrderedDict
+from io import TextIOWrapper
 
 from cassis.cas import Cas, IdGenerator, Sofa, View
-from cassis.typesystem import FeatureStructure, TypeNotFoundError, TypeSystem
+from cassis.typesystem import *
 
 RESERVED_FIELD_PREFIX = "%"
 TYPE_FIELD = RESERVED_FIELD_PREFIX + "TYPE"
@@ -23,7 +18,9 @@
 REF_FEATURE_PREFIX = "@"
 NAME_FIELD = RESERVED_FIELD_PREFIX + "NAME"
 SUPER_TYPE_FIELD = RESERVED_FIELD_PREFIX + "SUPER_TYPE"
+DESCRIPTION_FIELD = RESERVED_FIELD_PREFIX + "DESCRIPTION"
 ELEMENT_TYPE_FIELD = RESERVED_FIELD_PREFIX + "ELEMENT_TYPE"
+MULTIPLE_REFERENCES_ALLOWED_FIELD = RESERVED_FIELD_PREFIX + "MULTIPLE_REFERENCES_ALLOWED"
 ID_FIELD = RESERVED_FIELD_PREFIX + "ID"
 FLAGS_FIELD = RESERVED_FIELD_PREFIX + "FLAGS"
 FLAG_DOCUMENT_ANNOTATION = "DocumentAnnotation"
@@ -58,26 +55,30 @@ def __init__(self):
         self._max_sofa_num = 0
         self._post_processors = []
 
-    def deserialize(self, source: Union[IO, str], typesystem: TypeSystem) -> Cas:
+    def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = None) -> Cas:
         if isinstance(source, str):
             data = json.loads(source)
         else:
             data = json.load(source)
 
-        feature_structures = {}
-
         self._max_xmi_id = 0
         self._max_sofa_num = 0
         self._post_processors = []
 
-        data.get(TYPES_FIELD)  # FIXME
+        embedded_typesystem = TypeSystem()
+        json_typesystem = data.get(TYPES_FIELD)
+        for type_name, json_type in json_typesystem.items():
+            self._parse_type(embedded_typesystem, type_name, json_type)
+
+        typesystem = merge_typesystems(typesystem, embedded_typesystem)
 
         cas = Cas(typesystem=typesystem)
 
+        feature_structures = {}
         json_feature_structures = data.get(FEATURE_STRUCTURES_FIELD)
         if isinstance(json_feature_structures, list):
             for json_fs in json_feature_structures:
-                if json_fs.get(TYPE_FIELD) == Cas.TYPE_NAME_SOFA:
+                if json_fs.get(TYPE_FIELD) == TYPE_NAME_SOFA:
                     fs_id = json_fs.get(ID_FIELD)
                     fs = self._parse_sofa(cas, fs_id, json_fs, feature_structures)
                 else:
@@ -87,7 +88,7 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem) -> Cas:
 
         if isinstance(json_feature_structures, dict):
             for fs_id, json_fs in json_feature_structures.items():
-                if json_fs.get(TYPE_FIELD) == Cas.TYPE_NAME_SOFA:
+                if json_fs.get(TYPE_FIELD) == TYPE_NAME_SOFA:
                     fs_id = int(fs_id)
                     fs = self._parse_sofa(cas, fs_id, json_fs, feature_structures)
                 else:
@@ -110,11 +111,28 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem) -> Cas:
 
         return cas
 
+    def _parse_type(self, typesystem: TypeSystem, type_name: str, json_type: Dict[str, any]):
+        super_type_name = json_type[SUPER_TYPE_FIELD]
+        description = json_type.get(DESCRIPTION_FIELD)
+        new_type = typesystem.create_type(type_name, super_type_name, description=description)
+
+        for key, value in json_type.items():
+            if key.startswith(RESERVED_FIELD_PREFIX):
+                continue
+            typesystem.add_feature(
+                new_type,
+                name=key,
+                rangeTypeName=json_type[RANGE_FIELD],
+                description=json_type.get(DESCRIPTION_FIELD),
+                elementType=json_type.get(ELEMENT_TYPE_FIELD),
+                multipleReferencesAllowed=json_type.get(MULTIPLE_REFERENCES_ALLOWED_FIELD),
+            )
+
     def _get_or_create_view(
         self, cas: Cas, view_name: str, fs_id: Optional[int] = None, sofa_num: Optional[int] = None
     ) -> Cas:
-        if view_name == Cas.NAME_DEFAULT_SOFA:
-            view = cas.get_view(Cas.NAME_DEFAULT_SOFA)
+        if view_name == NAME_DEFAULT_SOFA:
+            view = cas.get_view(NAME_DEFAULT_SOFA)
 
             # We need to make sure that the sofa gets the real xmi, see #155
             if fs_id is not None:
@@ -124,7 +142,7 @@ def _get_or_create_view(
         else:
             return cas.create_view(view_name, xmiID=fs_id, sofaNum=sofa_num)
 
-    def _parse_view(self, cas: Cas, view_name: str, json_view: Dict[str, any], feature_structures: List):
+    def _parse_view(self, cas: Cas, view_name: str, json_view: Dict[str, any], feature_structures: Dict[str, any]):
         view = self._get_or_create_view(cas, view_name)
         for member_id in json_view[VIEW_INDEX_FIELD]:
             fs = feature_structures[member_id]
@@ -132,13 +150,13 @@ def _parse_view(self, cas: Cas, view_name: str, json_view: Dict[str, any], featu
 
     def _parse_sofa(self, cas: Cas, fs_id: int, json_fs: Dict[str, any], feature_structures: Dict[int, any]) -> Sofa:
         view = self._get_or_create_view(
-            cas, json_fs.get(Cas.FEATURE_BASE_NAME_SOFAID), fs_id, json_fs.get(Cas.FEATURE_BASE_NAME_SOFANUM)
+            cas, json_fs.get(FEATURE_BASE_NAME_SOFAID), fs_id, json_fs.get(FEATURE_BASE_NAME_SOFANUM)
         )
 
-        view.sofa_string = json_fs.get(Cas.FEATURE_BASE_NAME_SOFASTRING)
-        view.sofa_mime = json_fs.get(Cas.FEATURE_BASE_NAME_SOFAMIME)
-        view.sofa_uri = json_fs.get(Cas.FEATURE_BASE_NAME_SOFAURI)
-        view.sofa_array = feature_structures.get(json_fs.get(REF_FEATURE_PREFIX + Cas.FEATURE_BASE_NAME_SOFAARRAY))
+        view.sofa_string = json_fs.get(FEATURE_BASE_NAME_SOFASTRING)
+        view.sofa_mime = json_fs.get(FEATURE_BASE_NAME_SOFAMIME)
+        view.sofa_uri = json_fs.get(FEATURE_BASE_NAME_SOFAURI)
+        view.sofa_array = feature_structures.get(json_fs.get(REF_FEATURE_PREFIX + FEATURE_BASE_NAME_SOFAARRAY))
 
         return view.get_sofa()
 
@@ -159,7 +177,7 @@ def _parse_feature_structure(
         if "type" in attributes:
             attributes["type_"] = attributes.pop("type")
 
-        if AnnotationType.name == Cas.TYPE_NAME_BYTE_ARRAY:
+        if AnnotationType.name == TYPE_NAME_BYTE_ARRAY:
             attributes["elements"] = base64.b64decode(attributes.get(ELEMENTS_FIELD))
 
         self._resolve_references(attributes, feature_structures)
@@ -205,6 +223,12 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
         views = data[VIEWS_FIELD] = {}
         feature_structures = data[FEATURE_STRUCTURES_FIELD] = []
 
+        for type_ in cas.typesystem.get_types():
+            if type_.name == TYPE_NAME_DOCUMENT_ANNOTATION:
+                continue
+            json_type = self._serialize_type(type_)
+            types[json_type[NAME_FIELD]] = json_type
+
         for view in cas.views:
             views[view.sofa.sofaID] = self._serialize_view(view)
             if view.sofa.sofaArray:
@@ -222,13 +246,52 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
             sink = TextIOWrapper(sink, encoding="utf-8", write_through=True)
 
         if sink:
-            json.dump(data, sink, sort_keys=False)
+            json.dump(data, sink, sort_keys=False, indent=2 if pretty_print else None)
         else:
-            json.dumps(data, sort_keys=False)
+            json.dumps(data, sort_keys=False, indent=2 if pretty_print else None)
 
         if isinstance(sink, TextIOWrapper):
             sink.detach()  # Prevent TextIOWrapper from closing the BytesIO
 
+    def _serialize_type(self, type_: Type):
+        type_name = self._to_external_type_name(type_.name)
+        supertype_name = self._to_external_type_name(type_.supertypeName)
+
+        json_type = {
+            NAME_FIELD: type_name,
+            SUPER_TYPE_FIELD: supertype_name,
+            DESCRIPTION_FIELD: type_.description,
+        }
+
+        for feature in list(type_.features):
+            json_feature = self._serialize_feature(json_type, feature)
+            json_type[json_feature[NAME_FIELD]] = json_feature
+
+        return json_type
+
+    def _serialize_feature(self, json_type, feature: Feature):
+        # If the feature name is a reserved name like `self`, then we added an
+        # underscore to it before so Python can handle it. We now need to remove it.
+        feature_name = feature.name
+        if feature._has_reserved_name:
+            feature_name = feature_name[:-1]
+
+        json_feature = {
+            NAME_FIELD: feature_name,
+            RANGE_FIELD: self._to_external_type_name(feature.rangeTypeName),
+        }
+
+        if feature.description:
+            json_feature[DESCRIPTION_FIELD] = feature.description
+
+        if feature.multipleReferencesAllowed is not None:
+            json_feature[MULTIPLE_REFERENCES_ALLOWED_FIELD] = feature.multipleReferencesAllowed
+
+        if feature.elementType is not None:
+            json_feature[ELEMENT_TYPE_FIELD] = self._to_external_type_name(feature.elementType)
+
+        return json_feature
+
     def _serialize_feature_structure(self, cas, fs) -> dict:
         json_fs = OrderedDict()
         json_fs[ID_FIELD] = fs.xmiID
@@ -256,9 +319,9 @@ def _serialize_feature_structure(self, cas, fs) -> dict:
             #    sofa: Sofa = getattr(fs, "sofa")
             #    value = sofa._offset_converter.cassis_to_uima(value)
 
-            if t.name == Cas.TYPE_NAME_BYTE_ARRAY and feature_name == "elements":
+            if t.name == TYPE_NAME_BYTE_ARRAY and feature_name == "elements":
                 json_fs[ELEMENTS_FIELD] = base64.b64encode(value).decode("ascii")
-            elif t.supertypeName == Cas.TYPE_NAME_ARRAY_BASE and feature_name == "elements":
+            elif t.supertypeName == TYPE_NAME_ARRAY_BASE and feature_name == "elements":
                 json_fs[ELEMENTS_FIELD] = value
             elif ts.is_primitive(feature.rangeTypeName):
                 json_fs[feature_name] = value
@@ -271,3 +334,8 @@ def _serialize_feature_structure(self, cas, fs) -> dict:
 
     def _serialize_view(self, view: View):
         return {VIEW_SOFA_FIELD: view.sofa.xmiID, VIEW_INDEX_FIELD: sorted(x.xmiID for x in view.get_all_annotations())}
+
+    def _to_external_type_name(self, type_name: str):
+        if type_name.startswith("uima.noNamespace."):
+            return type_name.replace("uima.noNamespace.", "")
+        return type_name
diff --git a/cassis/typesystem.py b/cassis/typesystem.py
index fabed0f..c32d379 100644
--- a/cassis/typesystem.py
+++ b/cassis/typesystem.py
@@ -13,6 +13,50 @@
 
 TOP_TYPE_NAME = "uima.cas.TOP"
 
+NAMESPACE_SEPARATOR = "."
+
+NAME_SPACE_UIMA_CAS = "uima" + NAMESPACE_SEPARATOR + "cas"
+UIMA_CAS_PREFIX = NAME_SPACE_UIMA_CAS + NAMESPACE_SEPARATOR
+TYPE_NAME_TOP = UIMA_CAS_PREFIX + "TOP"
+TYPE_NAME_INTEGER = UIMA_CAS_PREFIX + "Integer"
+TYPE_NAME_FLOAT = UIMA_CAS_PREFIX + "Float"
+TYPE_NAME_STRING = UIMA_CAS_PREFIX + "String"
+TYPE_NAME_BOOLEAN = UIMA_CAS_PREFIX + "Boolean"
+TYPE_NAME_BYTE = UIMA_CAS_PREFIX + "Byte"
+TYPE_NAME_SHORT = UIMA_CAS_PREFIX + "Short"
+TYPE_NAME_LONG = UIMA_CAS_PREFIX + "Long"
+TYPE_NAME_DOUBLE = UIMA_CAS_PREFIX + "Double"
+TYPE_NAME_ARRAY_BASE = UIMA_CAS_PREFIX + "ArrayBase"
+TYPE_NAME_FS_ARRAY = UIMA_CAS_PREFIX + "FSArray"
+TYPE_NAME_INTEGER_ARRAY = UIMA_CAS_PREFIX + "IntegerArray"
+TYPE_NAME_FLOAT_ARRAY = UIMA_CAS_PREFIX + "FloatArray"
+TYPE_NAME_STRING_ARRAY = UIMA_CAS_PREFIX + "StringArray"
+TYPE_NAME_BOOLEAN_ARRAY = UIMA_CAS_PREFIX + "BooleanArray"
+TYPE_NAME_BYTE_ARRAY = UIMA_CAS_PREFIX + "ByteArray"
+TYPE_NAME_SHORT_ARRAY = UIMA_CAS_PREFIX + "ShortArray"
+TYPE_NAME_LONG_ARRAY = UIMA_CAS_PREFIX + "LongArray"
+TYPE_NAME_DOUBLE_ARRAY = UIMA_CAS_PREFIX + "DoubleArray"
+TYPE_NAME_FS_HASH_SET = UIMA_CAS_PREFIX + "FSHashSet"
+TYPE_NAME_ANNOTATION_BASE = UIMA_CAS_PREFIX + "AnnotationBase"
+
+NAME_DEFAULT_SOFA = "_InitialView"
+TYPE_NAME_SOFA = UIMA_CAS_PREFIX + "Sofa"
+FEATURE_BASE_NAME_SOFANUM = "sofaNum"
+FEATURE_BASE_NAME_SOFAID = "sofaID"
+FEATURE_BASE_NAME_SOFAMIME = "mimeType"
+FEATURE_BASE_NAME_SOFAURI = "sofaURI"
+FEATURE_BASE_NAME_SOFASTRING = "sofaString"
+FEATURE_BASE_NAME_SOFAARRAY = "sofaArray"
+
+NAME_SPACE_UIMA_TCAS = "uima" + NAMESPACE_SEPARATOR + "tcas"
+UIMA_TCAS_PREFIX = NAME_SPACE_UIMA_TCAS + NAMESPACE_SEPARATOR
+TYPE_NAME_ANNOTATION = UIMA_TCAS_PREFIX + "Annotation"
+TYPE_NAME_DOCUMENT_ANNOTATION = UIMA_TCAS_PREFIX + "DocumentAnnotation"
+FEATURE_BASE_NAME_SOFA = "sofa"
+FEATURE_BASE_NAME_BEGIN = "begin"
+FEATURE_BASE_NAME_END = "end"
+FEATURE_BASE_NAME_LANGUAGE = "language"
+
 _DOCUMENT_ANNOTATION_TYPE = "uima.tcas.DocumentAnnotation"
 
 _PREDEFINED_TYPES = {
@@ -349,8 +393,6 @@ def descendants(self) -> Iterator["Type"]:
 
 
 class TypeSystem:
-    NAMESPACE_SEPARATOR = "."
-
     def __init__(self, add_document_annotation_type: bool = True):
         self._types = {}
 
@@ -426,7 +468,12 @@ def __init__(self, add_document_annotation_type: bool = True):
         self.add_feature(t, name="sofaNum", rangeTypeName="uima.cas.Integer")
         self.add_feature(t, name="sofaID", rangeTypeName="uima.cas.String")
         self.add_feature(t, name="mimeType", rangeTypeName="uima.cas.String")
-        self.add_feature(t, name="sofaArray", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
+        self.add_feature(
+            t,
+            name="sofaArray",
+            rangeTypeName="uima.cas.TOP",
+            multipleReferencesAllowed=True,
+        )
         self.add_feature(t, name="sofaString", rangeTypeName="uima.cas.String")
         self.add_feature(t, name="sofaURI", rangeTypeName="uima.cas.String")
 
diff --git a/tests/test_files/test_cas_generators.py b/tests/test_files/test_cas_generators.py
new file mode 100644
index 0000000..25bb4da
--- /dev/null
+++ b/tests/test_files/test_cas_generators.py
@@ -0,0 +1,42 @@
+from random import Random
+
+from cassis import Cas, TypeSystem
+from cassis.typesystem import *
+
+
+class MultiTypeRandomCasGenerator:
+    def __init__(self):
+        self.type_count = 10
+        self.size = 10
+        self.minimum_width = 0
+        self.rnd = Random()
+
+    def generate_type_system(self) -> TypeSystem:
+        typesystem = TypeSystem()
+        types = []
+
+        for ti in range(0, self.type_count):
+            type_name = f"test.Type{ti + 1}"
+            if self.rnd.randint(0, 1) == 0 or not types:
+                typesystem.create_type(type_name, TYPE_NAME_ANNOTATION)
+            else:
+                typesystem.create_type(type_name, self.rnd.choice(types))
+            types.append(type_name)
+
+        return typesystem
+
+    def generate_cas(self, typesystem: TypeSystem) -> Cas:
+        cas = Cas(typesystem)
+
+        types = [t for t in typesystem.get_types()]
+        types.remove(cas.typesystem.get_type(TYPE_NAME_DOCUMENT_ANNOTATION))
+        self.rnd.shuffle(types)
+
+        for n in range(0, self.size):
+            for T in types:
+                begin = self.rnd.randint(0, 100)
+                end = self.rnd.randint(0, 30) + self.minimum_width
+                fs = T(begin=begin, end=end)
+                cas.add_annotation(fs)
+
+        return cas
diff --git a/tests/test_json.py b/tests/test_json.py
index a7aaaab..4633fd9 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -1,6 +1,7 @@
 import json
 
 from tests.fixtures import *
+from tests.test_files.test_cas_generators import MultiTypeRandomCasGenerator
 from tests.util import assert_json_equal
 
 FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files")
@@ -24,3 +25,18 @@ def test_deserialization_serialization(json_path):
     actual_json = cas.to_json()
 
     assert_json_equal(actual_json, expected_json)
+
+
+def test_serialization_deserialization():
+    generator = MultiTypeRandomCasGenerator()
+    for i in range(0, 10):
+        generator.size = (i + 1) * 10
+        generator.type_count = i + 1
+        typesystem = generator.generate_type_system()
+        randomized_cas = generator.generate_cas(typesystem)
+        expected_json = randomized_cas.to_json(pretty_print=True)
+
+        loaded_cas = load_cas_from_json(expected_json)
+        actual_json = loaded_cas.to_json()
+
+        assert_json_equal(actual_json, expected_json)
diff --git a/tests/util.py b/tests/util.py
index de1d26b..129cc84 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -50,7 +50,13 @@ def assert_json_equal(actual: str, expected: Union[IO, str]):
     Throws:
         AssertionError when json(actual) != json(expected)
     """
-    actual_json = json.dumps(json.loads(actual), sort_keys=True, indent=2)
+    if isinstance(actual, str):
+        actual = json.loads(actual)
+
+    if isinstance(expected, str):
+        expected = json.loads(expected)
+
+    actual_json = json.dumps(actual, sort_keys=True, indent=2)
     expected_json = json.dumps(expected, sort_keys=True, indent=2)
 
     try:

From 4f462be7841eb1a3c316f7bdc4f1ca0cde52d148 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 13 Aug 2021 17:34:49 +0200
Subject: [PATCH 06/22] #168 - Experimental JSON CAS support

- Added another generator for random CASes
- Added more tests
- Commented out all testing of arrays in the new generator since array handling in cassis seems to have a few conceptual problems when need to be looked at first
---
 cassis/cas.py                           |   2 +
 cassis/json.py                          |  47 ++++++-----
 cassis/typesystem.py                    |   1 -
 tests/test_files/test_cas_generators.py | 102 ++++++++++++++++++++++++
 tests/test_json.py                      |  23 +++++-
 5 files changed, 150 insertions(+), 25 deletions(-)

diff --git a/cassis/cas.py b/cassis/cas.py
index ca74543..257bcbd 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -13,6 +13,8 @@
 
 _validator_optional_string = validators.optional(validators.instance_of(str))
 
+NAME_DEFAULT_SOFA = "_InitialView"
+
 
 class IdGenerator:
     def __init__(self, initial_id: int = 1):
diff --git a/cassis/json.py b/cassis/json.py
index 34cac0e..14b2e5d 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -3,7 +3,7 @@
 from collections import OrderedDict
 from io import TextIOWrapper
 
-from cassis.cas import Cas, IdGenerator, Sofa, View
+from cassis.cas import NAME_DEFAULT_SOFA, Cas, IdGenerator, Sofa, View
 from cassis.typesystem import *
 
 RESERVED_FIELD_PREFIX = "%"
@@ -116,16 +116,16 @@ def _parse_type(self, typesystem: TypeSystem, type_name: str, json_type: Dict[st
         description = json_type.get(DESCRIPTION_FIELD)
         new_type = typesystem.create_type(type_name, super_type_name, description=description)
 
-        for key, value in json_type.items():
+        for key, json_feature in json_type.items():
             if key.startswith(RESERVED_FIELD_PREFIX):
                 continue
             typesystem.add_feature(
                 new_type,
                 name=key,
-                rangeTypeName=json_type[RANGE_FIELD],
-                description=json_type.get(DESCRIPTION_FIELD),
-                elementType=json_type.get(ELEMENT_TYPE_FIELD),
-                multipleReferencesAllowed=json_type.get(MULTIPLE_REFERENCES_ALLOWED_FIELD),
+                rangeTypeName=json_feature[RANGE_FIELD],
+                description=json_feature.get(DESCRIPTION_FIELD),
+                elementType=json_feature.get(ELEMENT_TYPE_FIELD),
+                multipleReferencesAllowed=json_feature.get(MULTIPLE_REFERENCES_ALLOWED_FIELD),
             )
 
     def _get_or_create_view(
@@ -180,27 +180,32 @@ def _parse_feature_structure(
         if AnnotationType.name == TYPE_NAME_BYTE_ARRAY:
             attributes["elements"] = base64.b64decode(attributes.get(ELEMENTS_FIELD))
 
-        self._resolve_references(attributes, feature_structures)
         self._strip_reserved_json_keys(attributes)
 
-        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
-        return AnnotationType(**attributes)
-
-    def _resolve_references(self, attributes: Dict[str, any], feature_structures: Dict[int, any]):
+        ref_features = {}
         for key, value in list(attributes.items()):
             if key.startswith(REF_FEATURE_PREFIX):
+                ref_features[key[1:]] = value
                 attributes.pop(key)
-                feature_name = key[1:]
-                target_fs = feature_structures.get(value)
-                if target_fs:
-                    # Resolve id-ref now
-                    attributes[feature_name] = target_fs
-                else:
-                    # Resolve id-ref at the end of processing
-                    def fix_up():
-                        attributes[feature_name] = feature_structures.get(value)
 
-                    self._post_processors.append(fix_up)
+        self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
+        fs = AnnotationType(**attributes)
+
+        self._resolve_references(fs, ref_features, feature_structures)
+        return fs
+
+    def _resolve_references(self, fs, ref_features: Dict[str, any], feature_structures: Dict[int, any]):
+        for key, value in ref_features.items():
+            target_fs = feature_structures.get(value)
+            if target_fs:
+                # Resolve id-ref now
+                setattr(fs, key, target_fs)
+            else:
+                # Resolve id-ref at the end of processing
+                def fix_up():
+                    setattr(fs, key, feature_structures.get(value))
+
+                self._post_processors.append(fix_up)
 
     def _strip_reserved_json_keys(
         self,
diff --git a/cassis/typesystem.py b/cassis/typesystem.py
index c32d379..92898de 100644
--- a/cassis/typesystem.py
+++ b/cassis/typesystem.py
@@ -39,7 +39,6 @@
 TYPE_NAME_FS_HASH_SET = UIMA_CAS_PREFIX + "FSHashSet"
 TYPE_NAME_ANNOTATION_BASE = UIMA_CAS_PREFIX + "AnnotationBase"
 
-NAME_DEFAULT_SOFA = "_InitialView"
 TYPE_NAME_SOFA = UIMA_CAS_PREFIX + "Sofa"
 FEATURE_BASE_NAME_SOFANUM = "sofaNum"
 FEATURE_BASE_NAME_SOFAID = "sofaID"
diff --git a/tests/test_files/test_cas_generators.py b/tests/test_files/test_cas_generators.py
index 25bb4da..3284da6 100644
--- a/tests/test_files/test_cas_generators.py
+++ b/tests/test_files/test_cas_generators.py
@@ -40,3 +40,105 @@ def generate_cas(self, typesystem: TypeSystem) -> Cas:
                 cas.add_annotation(fs)
 
         return cas
+
+
+class MultiFeatureRandomCasGenerator:
+    STRING_VALUES = ["abc", "abcdef", None, "", "ghijklm", "a", "b"]
+    BYTE_VALUES = [1, 0, -1, 127, -128, 9, -9]
+    LONG_VALUES = [1, 0, -1, 9223372036854775807, -9223372036854775808, 11, -11]
+    SHORT_VALUES = [1, 0, -1, 32767, -32768, 22, -22]
+    DOUBLE_VALUES = [1, 0, -1, 999999999999, -999999999999, 33, -33.33]
+    FLOAT_VALUES = [1, 0, -1, 999999999999, -999999999999, 17, -22.33]
+    BOOL_VALUES = [True, False]
+
+    def __init__(self):
+        self.size = 10
+        self.rnd = Random()
+
+    def generate_type_system(self) -> TypeSystem:
+        typesystem = TypeSystem()
+        Akof = typesystem.create_type("akof", TYPE_NAME_TOP, "all kinds of features")
+        typesystem.add_feature(Akof, "akofInt", TYPE_NAME_INTEGER)
+        typesystem.add_feature(Akof, "akofFs", TYPE_NAME_TOP)
+        typesystem.add_feature(Akof, "akofFloat", TYPE_NAME_FLOAT)
+        typesystem.add_feature(Akof, "akofDouble", TYPE_NAME_DOUBLE)
+        typesystem.add_feature(Akof, "akofLong", TYPE_NAME_LONG)
+        typesystem.add_feature(Akof, "akofShort", TYPE_NAME_SHORT)
+        typesystem.add_feature(Akof, "akofByte", TYPE_NAME_BYTE)
+        typesystem.add_feature(Akof, "akofBoolean", TYPE_NAME_BOOLEAN)
+        typesystem.add_feature(Akof, "akofString", TYPE_NAME_STRING)
+        # typesystem.add_feature(Akof, "akofAInt", TYPE_NAME_INTEGER_ARRAY)
+        # typesystem.add_feature(Akof, "akofAFs", TYPE_NAME_FS_ARRAY)
+        # typesystem.add_feature(Akof, "akofAFloat", TYPE_NAME_FLOAT_ARRAY)
+        # typesystem.add_feature(Akof, "akofADouble", TYPE_NAME_DOUBLE_ARRAY)
+        # typesystem.add_feature(Akof, "akofALong", TYPE_NAME_LONG_ARRAY)
+        # typesystem.add_feature(Akof, "akofAShort", TYPE_NAME_SHORT_ARRAY)
+        # typesystem.add_feature(Akof, "akofAByte", TYPE_NAME_BYTE_ARRAY)
+        # typesystem.add_feature(Akof, "akofABoolean", TYPE_NAME_BOOLEAN_ARRAY)
+        # typesystem.add_feature(Akof, "akofAString", TYPE_NAME_STRING_ARRAY)
+        return typesystem
+
+    def generate_cas(self, typesystem: TypeSystem) -> Cas:
+        feature_structures = []
+
+        cas = Cas(typesystem)
+
+        for i in range(0, self.size):
+            feature_structures.append(self._makeAkof(cas))
+
+        # Randomly link feature structures to each other
+#        FSArray = cas.typesystem.get_type(TYPE_NAME_FS_ARRAY)
+        for fs in feature_structures:
+            fs.akofFs = self.rnd.choice(feature_structures)
+#            fs.akofAFs = FSArray(
+#                elements=[self.rnd.choice(feature_structures) for i in range(0, self.rnd.randint(1, 3))]
+#            )
+
+        cas.add_annotations(feature_structures)
+
+        return cas
+
+    def _makeAkof(self, cas: Cas) -> Any:
+        Akof = cas.typesystem.get_type("akof")
+        # IntegerArray = cas.typesystem.get_type(TYPE_NAME_INTEGER_ARRAY)
+        # FloatArray = cas.typesystem.get_type(TYPE_NAME_FLOAT_ARRAY)
+        # DoubleArray = cas.typesystem.get_type(TYPE_NAME_DOUBLE_ARRAY)
+        # LongArray = cas.typesystem.get_type(TYPE_NAME_LONG_ARRAY)
+        # ShortArray = cas.typesystem.get_type(TYPE_NAME_SHORT_ARRAY)
+        # ByteArray = cas.typesystem.get_type(TYPE_NAME_BYTE_ARRAY)
+        # BooleanArray = cas.typesystem.get_type(TYPE_NAME_BOOLEAN_ARRAY)
+        # StringArray = cas.typesystem.get_type(TYPE_NAME_STRING_ARRAY)
+        akof = Akof()
+        akof.akofInt = self.rnd.randint(-2147483648, 2147483647)
+        akof.akofFloat = self.rnd.choice(self.FLOAT_VALUES)
+        akof.akofDouble = self.rnd.choice(self.DOUBLE_VALUES)
+        akof.akofLong = self.rnd.choice(self.LONG_VALUES)
+        akof.akofShort = self.rnd.choice(self.SHORT_VALUES)
+        akof.akofByte = self.rnd.choice(self.BYTE_VALUES)
+        akof.akofBoolean = self.rnd.choice(self.BOOL_VALUES)
+        akof.akofString = self.rnd.choice(self.STRING_VALUES)
+        # akof.akofAInt = IntegerArray(
+        #     elements=[self.rnd.randint(-2147483648, 2147483647) for i in range(0, self.rnd.randint(1, 3))]
+        # )
+        # akof.akofAFloat = FloatArray(
+        #     elements=[self.rnd.choice(self.FLOAT_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        # )
+        # akof.akofADouble = DoubleArray(
+        #     elements=[self.rnd.choice(self.DOUBLE_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        # )
+        # akof.akofALong = LongArray(
+        #     elements=[self.rnd.choice(self.LONG_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        # )
+        # akof.akofAShort = ShortArray(
+        #     elements=[self.rnd.choice(self.SHORT_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        # )
+        # akof.akofAByte = ByteArray(
+        #     elements=[self.rnd.choice(self.BYTE_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        # )
+        # akof.akofABoolean = BooleanArray(
+        #     elements=[self.rnd.choice(self.BOOL_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        # )
+        # akof.akofAString = StringArray(
+        #     elements=[self.rnd.choice(self.STRING_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        # )
+        return akof
diff --git a/tests/test_json.py b/tests/test_json.py
index 4633fd9..4b10495 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -1,7 +1,8 @@
 import json
 
+from cassis.cas import NAME_DEFAULT_SOFA
 from tests.fixtures import *
-from tests.test_files.test_cas_generators import MultiTypeRandomCasGenerator
+from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
 from tests.util import assert_json_equal
 
 FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files")
@@ -27,14 +28,30 @@ def test_deserialization_serialization(json_path):
     assert_json_equal(actual_json, expected_json)
 
 
-def test_serialization_deserialization():
+def test_multi_type_random_serialization_deserialization():
     generator = MultiTypeRandomCasGenerator()
     for i in range(0, 10):
         generator.size = (i + 1) * 10
         generator.type_count = i + 1
         typesystem = generator.generate_type_system()
         randomized_cas = generator.generate_cas(typesystem)
-        expected_json = randomized_cas.to_json(pretty_print=True)
+        print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}")
+        expected_json = randomized_cas.to_json()
+
+        loaded_cas = load_cas_from_json(expected_json)
+        actual_json = loaded_cas.to_json()
+
+        assert_json_equal(actual_json, expected_json)
+
+
+def test_multi_feature_random_serialization_deserialization():
+    generator = MultiFeatureRandomCasGenerator()
+    for i in range(0, 10):
+        generator.size = (i + 1) * 10
+        typesystem = generator.generate_type_system()
+        randomized_cas = generator.generate_cas(typesystem)
+        print(f"CAS size: {sum(len(view.get_all_annotations()) for view in randomized_cas.views)}")
+        expected_json = randomized_cas.to_json()
 
         loaded_cas = load_cas_from_json(expected_json)
         actual_json = loaded_cas.to_json()

From cbf086ecf1ab02ebfbd6b2338e5133529c4e3a34 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 13 Aug 2021 18:19:36 +0200
Subject: [PATCH 07/22] #168 - Experimental JSON CAS support

- Revert change to stripping the null FS
- Changed reference data so that IDs start at 1 and not at 0 leaving 0 reserved for the null FS
---
 cassis/cas.py                                      |  5 +----
 .../ser-ref/casWithSofaDataArray/data.json         |  8 ++++----
 .../ser-ref/casWithSofaDataURI/data.json           |  4 ++--
 .../json/fs_as_array/ser-ref/casWithText/data.json | 10 +++++-----
 .../ser-ref/casWithTextAndAnnotation/data.json     | 14 +++++++-------
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/cassis/cas.py b/cassis/cas.py
index 257bcbd..999771d 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -646,10 +646,7 @@ def _find_all_fs(self) -> Iterable[FeatureStructure]:
                         openlist.append(referenced_fs)
 
         # We do not want to return cas:NULL here as we handle serializing it later
-        for fs_id, fs in list(all_fs.items()):
-            if fs.type == "uima.cas.NULL":
-                all_fs.pop(fs_id)
-
+        all_fs.pop(0, None)
         yield from all_fs.values()
 
     def _get_next_xmi_id(self) -> int:
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
index b732eaf..054d442 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
@@ -2,20 +2,20 @@
   "%TYPES" : { },
   "%VIEWS" : {
     "_InitialView" : {
-      "%SOFA" : 0,
+      "%SOFA" : 1,
       "%INDEX" : [ ]
     }
   },
   "%FEATURE_STRUCTURES" : [ {
-    "%ID" : 1,
+    "%ID" : 2,
     "%TYPE" : "uima.cas.ByteArray",
     "%ELEMENTS" : "VGhpcyBpcyBhIHRlc3Q="
   }, {
-    "%ID" : 0,
+    "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
     "sofaNum" : 1,
     "sofaID" : "_InitialView",
     "mimeType" : "text/plain",
-    "@sofaArray" : 1
+    "@sofaArray" : 2
   } ]
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
index 678d9e2..9375241 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
@@ -2,12 +2,12 @@
   "%TYPES" : { },
   "%VIEWS" : {
     "_InitialView" : {
-      "%SOFA" : 0,
+      "%SOFA" : 1,
       "%INDEX" : [ ]
     }
   },
   "%FEATURE_STRUCTURES" : [ {
-    "%ID" : 0,
+    "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
     "sofaNum" : 1,
     "sofaID" : "_InitialView",
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
index 416e6e1..c8fd81a 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
@@ -2,21 +2,21 @@
   "%TYPES" : { },
   "%VIEWS" : {
     "_InitialView" : {
-      "%SOFA" : 0,
-      "%INDEX" : [ 1 ]
+      "%SOFA" : 1,
+      "%INDEX" : [ 2 ]
     }
   },
   "%FEATURE_STRUCTURES" : [ {
-    "%ID" : 0,
+    "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
     "sofaNum" : 1,
     "sofaID" : "_InitialView",
     "mimeType" : "text",
     "sofaString" : "This is a test."
   }, {
-    "%ID" : 1,
+    "%ID" : 2,
     "%TYPE" : "uima.tcas.DocumentAnnotation",
-    "@sofa" : 0,
+    "@sofa" : 1,
     "begin" : 0,
     "end" : 15,
     "language" : "x-unspecified"
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
index aa71704..8debb1f 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
@@ -2,28 +2,28 @@
   "%TYPES" : { },
   "%VIEWS" : {
     "_InitialView" : {
-      "%SOFA" : 0,
-      "%INDEX" : [ 1, 2 ]
+      "%SOFA" : 1,
+      "%INDEX" : [ 2, 3 ]
     }
   },
   "%FEATURE_STRUCTURES" : [ {
-    "%ID" : 0,
+    "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
     "sofaNum" : 1,
     "sofaID" : "_InitialView",
     "mimeType" : "text",
     "sofaString" : "This is a test."
   }, {
-    "%ID" : 1,
+    "%ID" : 2,
     "%TYPE" : "uima.tcas.DocumentAnnotation",
-    "@sofa" : 0,
+    "@sofa" : 1,
     "begin" : 0,
     "end" : 15,
     "language" : "x-unspecified"
   }, {
-    "%ID" : 2,
+    "%ID" : 3,
     "%TYPE" : "uima.tcas.Annotation",
-    "@sofa" : 0,
+    "@sofa" : 1,
     "begin" : 0,
     "end" : 15
   } ]

From 3b50a8e6e3839f4ead3b06d04b05fe8220845113 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Thu, 19 Aug 2021 12:31:47 +0200
Subject: [PATCH 08/22] #168 - Experimental JSON CAS support

- Fix array support
- Enable array tests
---
 cassis/json.py                          |  57 ++++++++----
 tests/test_files/test_cas_generators.py | 112 ++++++++++++------------
 tests/test_json.py                      |   2 +-
 tests/util.py                           |   6 +-
 4 files changed, 103 insertions(+), 74 deletions(-)

diff --git a/cassis/json.py b/cassis/json.py
index 14b2e5d..bd7655f 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -119,7 +119,7 @@ def _parse_type(self, typesystem: TypeSystem, type_name: str, json_type: Dict[st
         for key, json_feature in json_type.items():
             if key.startswith(RESERVED_FIELD_PREFIX):
                 continue
-            typesystem.add_feature(
+            typesystem.create_feature(
                 new_type,
                 name=key,
                 rangeTypeName=json_feature[RANGE_FIELD],
@@ -177,8 +177,14 @@ def _parse_feature_structure(
         if "type" in attributes:
             attributes["type_"] = attributes.pop("type")
 
-        if AnnotationType.name == TYPE_NAME_BYTE_ARRAY:
-            attributes["elements"] = base64.b64decode(attributes.get(ELEMENTS_FIELD))
+        if typesystem.is_primitive_array(AnnotationType.name):
+            attributes["elements"] = self._parse_primitive_array(AnnotationType.name, json_fs.get(ELEMENTS_FIELD))
+        elif AnnotationType.name == TYPE_NAME_FS_ARRAY:
+            # Resolve id-ref at the end of processing
+            def fix_up(elements):
+                return lambda: setattr(fs, "elements", [feature_structures.get(e) for e in elements])
+
+            self._post_processors.append(fix_up(json_fs.get(ELEMENTS_FIELD)))
 
         self._strip_reserved_json_keys(attributes)
 
@@ -194,6 +200,12 @@ def _parse_feature_structure(
         self._resolve_references(fs, ref_features, feature_structures)
         return fs
 
+    def _parse_primitive_array(self, type_name: str, elements: [list, str]) -> List:
+        if type_name == TYPE_NAME_BYTE_ARRAY:
+            return base64.b64decode(elements)
+        else:
+            return elements
+
     def _resolve_references(self, fs, ref_features: Dict[str, any], feature_structures: Dict[int, any]):
         for key, value in ref_features.items():
             target_fs = feature_structures.get(value)
@@ -202,10 +214,10 @@ def _resolve_references(self, fs, ref_features: Dict[str, any], feature_structur
                 setattr(fs, key, target_fs)
             else:
                 # Resolve id-ref at the end of processing
-                def fix_up():
-                    setattr(fs, key, feature_structures.get(value))
+                def fix_up(k, v):
+                    return lambda: setattr(fs, k, feature_structures.get(v))
 
-                self._post_processors.append(fix_up)
+                self._post_processors.append(fix_up(key, value))
 
     def _strip_reserved_json_keys(
         self,
@@ -243,7 +255,7 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
             feature_structures.append(json_sofa_fs)
 
         # Find all fs, even the ones that are not directly added to a sofa
-        for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID):
+        for fs in sorted(cas._find_all_fs(include_inlinable_arrays=True), key=lambda a: a.xmiID):
             json_fs = self._serialize_feature_structure(cas, fs)
             feature_structures.append(json_fs)
 
@@ -304,6 +316,20 @@ def _serialize_feature_structure(self, cas, fs) -> dict:
 
         ts = cas.typesystem
         t = ts.get_type(fs.type)
+
+        if t.name == TYPE_NAME_BYTE_ARRAY:
+            if fs.elements:
+                json_fs[ELEMENTS_FIELD] = base64.b64encode(bytes(fs.elements)).decode("ascii")
+            return json_fs
+        elif ts.is_primitive_array(t.name):
+            if fs.elements:
+                json_fs[ELEMENTS_FIELD] = fs.elements
+            return json_fs
+        elif TYPE_NAME_FS_ARRAY == t.name:
+            if fs.elements:
+                json_fs[ELEMENTS_FIELD] = [self._serialize_ref(e) for e in fs.elements]
+            return json_fs
+
         for feature in t.all_features:
             if feature.name in CasJsonSerializer._COMMON_FIELD_NAMES:
                 continue
@@ -324,19 +350,20 @@ def _serialize_feature_structure(self, cas, fs) -> dict:
             #    sofa: Sofa = getattr(fs, "sofa")
             #    value = sofa._offset_converter.cassis_to_uima(value)
 
-            if t.name == TYPE_NAME_BYTE_ARRAY and feature_name == "elements":
-                json_fs[ELEMENTS_FIELD] = base64.b64encode(value).decode("ascii")
-            elif t.supertypeName == TYPE_NAME_ARRAY_BASE and feature_name == "elements":
-                json_fs[ELEMENTS_FIELD] = value
-            elif ts.is_primitive(feature.rangeTypeName):
+            if ts.is_primitive(feature.rangeTypeName):
                 json_fs[feature_name] = value
-            elif ts.is_collection(fs.type, feature):
-                json_fs[REF_FEATURE_PREFIX + feature_name] = value.xmiID
             else:
                 # We need to encode non-primitive features as a reference
-                json_fs[REF_FEATURE_PREFIX + feature_name] = value.xmiID
+                json_fs[REF_FEATURE_PREFIX + feature_name] = self._serialize_ref(value)
         return json_fs
 
+    def _serialize_ref(self, fs) -> int:
+        if not fs:
+            return None
+
+        return fs.xmiID
+
+
     def _serialize_view(self, view: View):
         return {VIEW_SOFA_FIELD: view.sofa.xmiID, VIEW_INDEX_FIELD: sorted(x.xmiID for x in view.get_all_annotations())}
 
diff --git a/tests/test_files/test_cas_generators.py b/tests/test_files/test_cas_generators.py
index 3284da6..2cd6e03 100644
--- a/tests/test_files/test_cas_generators.py
+++ b/tests/test_files/test_cas_generators.py
@@ -44,7 +44,9 @@ def generate_cas(self, typesystem: TypeSystem) -> Cas:
 
 class MultiFeatureRandomCasGenerator:
     STRING_VALUES = ["abc", "abcdef", None, "", "ghijklm", "a", "b"]
-    BYTE_VALUES = [1, 0, -1, 127, -128, 9, -9]
+    # In Java, bytes go from -128 to 127, in Python from 0 to 255.
+    # BYTE_VALUES = [1, 0, -1, 127, -128, 9, -9]
+    BYTE_VALUES = [1, 0, 255, 0, 9]
     LONG_VALUES = [1, 0, -1, 9223372036854775807, -9223372036854775808, 11, -11]
     SHORT_VALUES = [1, 0, -1, 32767, -32768, 22, -22]
     DOUBLE_VALUES = [1, 0, -1, 999999999999, -999999999999, 33, -33.33]
@@ -58,24 +60,24 @@ def __init__(self):
     def generate_type_system(self) -> TypeSystem:
         typesystem = TypeSystem()
         Akof = typesystem.create_type("akof", TYPE_NAME_TOP, "all kinds of features")
-        typesystem.add_feature(Akof, "akofInt", TYPE_NAME_INTEGER)
-        typesystem.add_feature(Akof, "akofFs", TYPE_NAME_TOP)
-        typesystem.add_feature(Akof, "akofFloat", TYPE_NAME_FLOAT)
-        typesystem.add_feature(Akof, "akofDouble", TYPE_NAME_DOUBLE)
-        typesystem.add_feature(Akof, "akofLong", TYPE_NAME_LONG)
-        typesystem.add_feature(Akof, "akofShort", TYPE_NAME_SHORT)
-        typesystem.add_feature(Akof, "akofByte", TYPE_NAME_BYTE)
-        typesystem.add_feature(Akof, "akofBoolean", TYPE_NAME_BOOLEAN)
-        typesystem.add_feature(Akof, "akofString", TYPE_NAME_STRING)
-        # typesystem.add_feature(Akof, "akofAInt", TYPE_NAME_INTEGER_ARRAY)
-        # typesystem.add_feature(Akof, "akofAFs", TYPE_NAME_FS_ARRAY)
-        # typesystem.add_feature(Akof, "akofAFloat", TYPE_NAME_FLOAT_ARRAY)
-        # typesystem.add_feature(Akof, "akofADouble", TYPE_NAME_DOUBLE_ARRAY)
-        # typesystem.add_feature(Akof, "akofALong", TYPE_NAME_LONG_ARRAY)
-        # typesystem.add_feature(Akof, "akofAShort", TYPE_NAME_SHORT_ARRAY)
-        # typesystem.add_feature(Akof, "akofAByte", TYPE_NAME_BYTE_ARRAY)
-        # typesystem.add_feature(Akof, "akofABoolean", TYPE_NAME_BOOLEAN_ARRAY)
-        # typesystem.add_feature(Akof, "akofAString", TYPE_NAME_STRING_ARRAY)
+        typesystem.create_feature(Akof, "akofInt", TYPE_NAME_INTEGER)
+        typesystem.create_feature(Akof, "akofFs", TYPE_NAME_TOP)
+        typesystem.create_feature(Akof, "akofFloat", TYPE_NAME_FLOAT)
+        typesystem.create_feature(Akof, "akofDouble", TYPE_NAME_DOUBLE)
+        typesystem.create_feature(Akof, "akofLong", TYPE_NAME_LONG)
+        typesystem.create_feature(Akof, "akofShort", TYPE_NAME_SHORT)
+        typesystem.create_feature(Akof, "akofByte", TYPE_NAME_BYTE)
+        typesystem.create_feature(Akof, "akofBoolean", TYPE_NAME_BOOLEAN)
+        typesystem.create_feature(Akof, "akofString", TYPE_NAME_STRING)
+        typesystem.create_feature(Akof, "akofAInt", TYPE_NAME_INTEGER_ARRAY)
+        typesystem.create_feature(Akof, "akofAFs", TYPE_NAME_FS_ARRAY)
+        typesystem.create_feature(Akof, "akofAFloat", TYPE_NAME_FLOAT_ARRAY)
+        typesystem.create_feature(Akof, "akofADouble", TYPE_NAME_DOUBLE_ARRAY)
+        typesystem.create_feature(Akof, "akofALong", TYPE_NAME_LONG_ARRAY)
+        typesystem.create_feature(Akof, "akofAShort", TYPE_NAME_SHORT_ARRAY)
+        typesystem.create_feature(Akof, "akofAByte", TYPE_NAME_BYTE_ARRAY)
+        typesystem.create_feature(Akof, "akofABoolean", TYPE_NAME_BOOLEAN_ARRAY)
+        typesystem.create_feature(Akof, "akofAString", TYPE_NAME_STRING_ARRAY)
         return typesystem
 
     def generate_cas(self, typesystem: TypeSystem) -> Cas:
@@ -87,12 +89,12 @@ def generate_cas(self, typesystem: TypeSystem) -> Cas:
             feature_structures.append(self._makeAkof(cas))
 
         # Randomly link feature structures to each other
-#        FSArray = cas.typesystem.get_type(TYPE_NAME_FS_ARRAY)
+        FSArray = cas.typesystem.get_type(TYPE_NAME_FS_ARRAY)
         for fs in feature_structures:
             fs.akofFs = self.rnd.choice(feature_structures)
-#            fs.akofAFs = FSArray(
-#                elements=[self.rnd.choice(feature_structures) for i in range(0, self.rnd.randint(1, 3))]
-#            )
+            fs.akofAFs = FSArray(
+                elements=[self.rnd.choice(feature_structures) for i in range(0, self.rnd.randint(1, 3))]
+            )
 
         cas.add_annotations(feature_structures)
 
@@ -100,14 +102,14 @@ def generate_cas(self, typesystem: TypeSystem) -> Cas:
 
     def _makeAkof(self, cas: Cas) -> Any:
         Akof = cas.typesystem.get_type("akof")
-        # IntegerArray = cas.typesystem.get_type(TYPE_NAME_INTEGER_ARRAY)
-        # FloatArray = cas.typesystem.get_type(TYPE_NAME_FLOAT_ARRAY)
-        # DoubleArray = cas.typesystem.get_type(TYPE_NAME_DOUBLE_ARRAY)
-        # LongArray = cas.typesystem.get_type(TYPE_NAME_LONG_ARRAY)
-        # ShortArray = cas.typesystem.get_type(TYPE_NAME_SHORT_ARRAY)
-        # ByteArray = cas.typesystem.get_type(TYPE_NAME_BYTE_ARRAY)
-        # BooleanArray = cas.typesystem.get_type(TYPE_NAME_BOOLEAN_ARRAY)
-        # StringArray = cas.typesystem.get_type(TYPE_NAME_STRING_ARRAY)
+        IntegerArray = cas.typesystem.get_type(TYPE_NAME_INTEGER_ARRAY)
+        FloatArray = cas.typesystem.get_type(TYPE_NAME_FLOAT_ARRAY)
+        DoubleArray = cas.typesystem.get_type(TYPE_NAME_DOUBLE_ARRAY)
+        LongArray = cas.typesystem.get_type(TYPE_NAME_LONG_ARRAY)
+        ShortArray = cas.typesystem.get_type(TYPE_NAME_SHORT_ARRAY)
+        ByteArray = cas.typesystem.get_type(TYPE_NAME_BYTE_ARRAY)
+        BooleanArray = cas.typesystem.get_type(TYPE_NAME_BOOLEAN_ARRAY)
+        StringArray = cas.typesystem.get_type(TYPE_NAME_STRING_ARRAY)
         akof = Akof()
         akof.akofInt = self.rnd.randint(-2147483648, 2147483647)
         akof.akofFloat = self.rnd.choice(self.FLOAT_VALUES)
@@ -117,28 +119,28 @@ def _makeAkof(self, cas: Cas) -> Any:
         akof.akofByte = self.rnd.choice(self.BYTE_VALUES)
         akof.akofBoolean = self.rnd.choice(self.BOOL_VALUES)
         akof.akofString = self.rnd.choice(self.STRING_VALUES)
-        # akof.akofAInt = IntegerArray(
-        #     elements=[self.rnd.randint(-2147483648, 2147483647) for i in range(0, self.rnd.randint(1, 3))]
-        # )
-        # akof.akofAFloat = FloatArray(
-        #     elements=[self.rnd.choice(self.FLOAT_VALUES) for i in range(0, self.rnd.randint(1, 3))]
-        # )
-        # akof.akofADouble = DoubleArray(
-        #     elements=[self.rnd.choice(self.DOUBLE_VALUES) for i in range(0, self.rnd.randint(1, 3))]
-        # )
-        # akof.akofALong = LongArray(
-        #     elements=[self.rnd.choice(self.LONG_VALUES) for i in range(0, self.rnd.randint(1, 3))]
-        # )
-        # akof.akofAShort = ShortArray(
-        #     elements=[self.rnd.choice(self.SHORT_VALUES) for i in range(0, self.rnd.randint(1, 3))]
-        # )
-        # akof.akofAByte = ByteArray(
-        #     elements=[self.rnd.choice(self.BYTE_VALUES) for i in range(0, self.rnd.randint(1, 3))]
-        # )
-        # akof.akofABoolean = BooleanArray(
-        #     elements=[self.rnd.choice(self.BOOL_VALUES) for i in range(0, self.rnd.randint(1, 3))]
-        # )
-        # akof.akofAString = StringArray(
-        #     elements=[self.rnd.choice(self.STRING_VALUES) for i in range(0, self.rnd.randint(1, 3))]
-        # )
+        akof.akofAInt = IntegerArray(
+            elements=[self.rnd.randint(-2147483648, 2147483647) for i in range(0, self.rnd.randint(1, 3))]
+        )
+        akof.akofAFloat = FloatArray(
+            elements=[self.rnd.choice(self.FLOAT_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        )
+        akof.akofADouble = DoubleArray(
+            elements=[self.rnd.choice(self.DOUBLE_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        )
+        akof.akofALong = LongArray(
+            elements=[self.rnd.choice(self.LONG_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        )
+        akof.akofAShort = ShortArray(
+            elements=[self.rnd.choice(self.SHORT_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        )
+        akof.akofAByte = ByteArray(
+            elements=[self.rnd.choice(self.BYTE_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        )
+        akof.akofABoolean = BooleanArray(
+            elements=[self.rnd.choice(self.BOOL_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        )
+        akof.akofAString = StringArray(
+            elements=[self.rnd.choice(self.STRING_VALUES) for i in range(0, self.rnd.randint(1, 3))]
+        )
         return akof
diff --git a/tests/test_json.py b/tests/test_json.py
index 4b10495..f9591fc 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -25,7 +25,7 @@ def test_deserialization_serialization(json_path):
 
     actual_json = cas.to_json()
 
-    assert_json_equal(actual_json, expected_json)
+    assert_json_equal(actual_json, expected_json, sort_keys=True)
 
 
 def test_multi_type_random_serialization_deserialization():
diff --git a/tests/util.py b/tests/util.py
index 129cc84..1c84f89 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -40,7 +40,7 @@ def assert_xml_equal(actual: Union[IO, str], expected: Union[IO, str]):
         raise e
 
 
-def assert_json_equal(actual: str, expected: Union[IO, str]):
+def assert_json_equal(actual: str, expected: Union[IO, str], sort_keys: bool = False):
     """Checks whether the JSON trees behind `actual` and `expected` are equal.
 
     Args:
@@ -56,8 +56,8 @@ def assert_json_equal(actual: str, expected: Union[IO, str]):
     if isinstance(expected, str):
         expected = json.loads(expected)
 
-    actual_json = json.dumps(actual, sort_keys=True, indent=2)
-    expected_json = json.dumps(expected, sort_keys=True, indent=2)
+    actual_json = json.dumps(actual, sort_keys=sort_keys, indent=2)
+    expected_json = json.dumps(expected, sort_keys=sort_keys, indent=2)
 
     try:
         assert actual_json == expected_json

From 59ceea4e47e6a9abb65774c3c381b441af000da3 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 20 Aug 2021 11:53:07 +0200
Subject: [PATCH 09/22] #168 - Experimental JSON CAS support

- Change view members field name
---
 cassis/json.py                                              | 6 +++---
 .../json/fs_as_array/ser-ref/casWithSofaDataArray/data.json | 2 +-
 .../json/fs_as_array/ser-ref/casWithSofaDataURI/data.json   | 2 +-
 .../json/fs_as_array/ser-ref/casWithText/data.json          | 2 +-
 .../fs_as_array/ser-ref/casWithTextAndAnnotation/data.json  | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cassis/json.py b/cassis/json.py
index bd7655f..320e0f8 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -13,7 +13,7 @@
 FEATURES_FIELD = RESERVED_FIELD_PREFIX + "FEATURES"
 VIEWS_FIELD = RESERVED_FIELD_PREFIX + "VIEWS"
 VIEW_SOFA_FIELD = RESERVED_FIELD_PREFIX + "SOFA"
-VIEW_INDEX_FIELD = RESERVED_FIELD_PREFIX + "INDEX"
+VIEW_MEMBERS_FIELD = RESERVED_FIELD_PREFIX + "MEMBERS"
 FEATURE_STRUCTURES_FIELD = RESERVED_FIELD_PREFIX + "FEATURE_STRUCTURES"
 REF_FEATURE_PREFIX = "@"
 NAME_FIELD = RESERVED_FIELD_PREFIX + "NAME"
@@ -144,7 +144,7 @@ def _get_or_create_view(
 
     def _parse_view(self, cas: Cas, view_name: str, json_view: Dict[str, any], feature_structures: Dict[str, any]):
         view = self._get_or_create_view(cas, view_name)
-        for member_id in json_view[VIEW_INDEX_FIELD]:
+        for member_id in json_view[VIEW_MEMBERS_FIELD]:
             fs = feature_structures[member_id]
             view.add_annotation(fs, keep_id=True)
 
@@ -365,7 +365,7 @@ def _serialize_ref(self, fs) -> int:
 
 
     def _serialize_view(self, view: View):
-        return {VIEW_SOFA_FIELD: view.sofa.xmiID, VIEW_INDEX_FIELD: sorted(x.xmiID for x in view.get_all_annotations())}
+        return {VIEW_SOFA_FIELD: view.sofa.xmiID, VIEW_MEMBERS_FIELD: sorted(x.xmiID for x in view.get_all_annotations())}
 
     def _to_external_type_name(self, type_name: str):
         if type_name.startswith("uima.noNamespace."):
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
index 054d442..edf6ddc 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
@@ -3,7 +3,7 @@
   "%VIEWS" : {
     "_InitialView" : {
       "%SOFA" : 1,
-      "%INDEX" : [ ]
+      "%MEMBERS" : [ ]
     }
   },
   "%FEATURE_STRUCTURES" : [ {
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
index 9375241..266ab55 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
@@ -3,7 +3,7 @@
   "%VIEWS" : {
     "_InitialView" : {
       "%SOFA" : 1,
-      "%INDEX" : [ ]
+      "%MEMBERS" : [ ]
     }
   },
   "%FEATURE_STRUCTURES" : [ {
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
index c8fd81a..1fe9f02 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
@@ -3,7 +3,7 @@
   "%VIEWS" : {
     "_InitialView" : {
       "%SOFA" : 1,
-      "%INDEX" : [ 2 ]
+      "%MEMBERS" : [ 2 ]
     }
   },
   "%FEATURE_STRUCTURES" : [ {
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
index 8debb1f..7879a33 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
@@ -3,7 +3,7 @@
   "%VIEWS" : {
     "_InitialView" : {
       "%SOFA" : 1,
-      "%INDEX" : [ 2, 3 ]
+      "%MEMBERS" : [ 2, 3 ]
     }
   },
   "%FEATURE_STRUCTURES" : [ {

From f42992f1bd2a82ef1b8219669f561101a7f260ba Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Wed, 25 Aug 2021 11:20:31 +0200
Subject: [PATCH 10/22] #168 - Experimental JSON CAS support

- Formatting
- Removed unused import
---
 cassis/json.py     |  6 ++++--
 cassis/xmi.py      | 10 ++++++++--
 tests/test_json.py |  1 -
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/cassis/json.py b/cassis/json.py
index 320e0f8..b09bf42 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -363,9 +363,11 @@ def _serialize_ref(self, fs) -> int:
 
         return fs.xmiID
 
-
     def _serialize_view(self, view: View):
-        return {VIEW_SOFA_FIELD: view.sofa.xmiID, VIEW_MEMBERS_FIELD: sorted(x.xmiID for x in view.get_all_annotations())}
+        return {
+            VIEW_SOFA_FIELD: view.sofa.xmiID,
+            VIEW_MEMBERS_FIELD: sorted(x.xmiID for x in view.get_all_annotations()),
+        }
 
     def _to_external_type_name(self, type_name: str):
         if type_name.startswith("uima.noNamespace."):
diff --git a/cassis/xmi.py b/cassis/xmi.py
index 9c7c753..9d4bf50 100644
--- a/cassis/xmi.py
+++ b/cassis/xmi.py
@@ -478,9 +478,15 @@ def _serialize_feature_structure(self, cas: Cas, root: etree.Element, fs: Featur
                 for e in value.elements:
                     child = etree.SubElement(elem, feature_name)
                     child.text = e
-            elif ts.is_primitive_array(feature.rangeTypeName) and not feature.multipleReferencesAllowed and value.elements:
+            elif (
+                ts.is_primitive_array(feature.rangeTypeName)
+                and not feature.multipleReferencesAllowed
+                and value.elements
+            ):
                 elem.attrib[feature_name] = self._serialize_primitive_array(feature.rangeTypeName, value.elements)
-            elif feature.rangeTypeName == "uima.cas.FSArray" and not feature.multipleReferencesAllowed and value.elements:
+            elif (
+                feature.rangeTypeName == "uima.cas.FSArray" and not feature.multipleReferencesAllowed and value.elements
+            ):
                 elem.attrib[feature_name] = " ".join(str(e.xmiID) for e in value.elements)
             elif feature_name == "sofa":
                 elem.attrib[feature_name] = str(value.xmiID)
diff --git a/tests/test_json.py b/tests/test_json.py
index f9591fc..0765ca3 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -1,6 +1,5 @@
 import json
 
-from cassis.cas import NAME_DEFAULT_SOFA
 from tests.fixtures import *
 from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
 from tests.util import assert_json_equal

From dafd693abd459110ba6a26e271f58e4585ba1277 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Wed, 25 Aug 2021 12:35:08 +0200
Subject: [PATCH 11/22] #168 - Experimental JSON CAS support

- Adjust to changes from #190
---
 cassis/json.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/cassis/json.py b/cassis/json.py
index b09bf42..ab66ef9 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -272,7 +272,7 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
 
     def _serialize_type(self, type_: Type):
         type_name = self._to_external_type_name(type_.name)
-        supertype_name = self._to_external_type_name(type_.supertypeName)
+        supertype_name = self._to_external_type_name(type_.supertype.name)
 
         json_type = {
             NAME_FIELD: type_name,
@@ -310,27 +310,27 @@ def _serialize_feature(self, json_type, feature: Feature):
         return json_feature
 
     def _serialize_feature_structure(self, cas, fs) -> dict:
+        ts = cas.typesystem
+        type_name = fs.type.name
+
         json_fs = OrderedDict()
         json_fs[ID_FIELD] = fs.xmiID
-        json_fs[TYPE_FIELD] = fs.type
+        json_fs[TYPE_FIELD] = type_name
 
-        ts = cas.typesystem
-        t = ts.get_type(fs.type)
-
-        if t.name == TYPE_NAME_BYTE_ARRAY:
+        if type_name == TYPE_NAME_BYTE_ARRAY:
             if fs.elements:
                 json_fs[ELEMENTS_FIELD] = base64.b64encode(bytes(fs.elements)).decode("ascii")
             return json_fs
-        elif ts.is_primitive_array(t.name):
+        elif ts.is_primitive_array(type_name):
             if fs.elements:
                 json_fs[ELEMENTS_FIELD] = fs.elements
             return json_fs
-        elif TYPE_NAME_FS_ARRAY == t.name:
+        elif TYPE_NAME_FS_ARRAY == type_name:
             if fs.elements:
                 json_fs[ELEMENTS_FIELD] = [self._serialize_ref(e) for e in fs.elements]
             return json_fs
 
-        for feature in t.all_features:
+        for feature in fs.type.all_features:
             if feature.name in CasJsonSerializer._COMMON_FIELD_NAMES:
                 continue
 

From 026fb9df5c27ea6e803e955e1ceb31299246b9b8 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Wed, 25 Aug 2021 15:01:27 +0200
Subject: [PATCH 12/22] #168 - Experimental JSON CAS support

- Adjust to changes from #190
---
 cassis/json.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/cassis/json.py b/cassis/json.py
index ab66ef9..4f962ab 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -122,7 +122,7 @@ def _parse_type(self, typesystem: TypeSystem, type_name: str, json_type: Dict[st
             typesystem.create_feature(
                 new_type,
                 name=key,
-                rangeTypeName=json_feature[RANGE_FIELD],
+                rangeType=json_feature[RANGE_FIELD],
                 description=json_feature.get(DESCRIPTION_FIELD),
                 elementType=json_feature.get(ELEMENT_TYPE_FIELD),
                 multipleReferencesAllowed=json_feature.get(MULTIPLE_REFERENCES_ALLOWED_FIELD),
@@ -249,14 +249,14 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
         for view in cas.views:
             views[view.sofa.sofaID] = self._serialize_view(view)
             if view.sofa.sofaArray:
-                json_sofa_array_fs = self._serialize_feature_structure(cas, view.sofa.sofaArray)
+                json_sofa_array_fs = self._serialize_feature_structure(view.sofa.sofaArray)
                 feature_structures.append(json_sofa_array_fs)
-            json_sofa_fs = self._serialize_feature_structure(cas, view.sofa)
+            json_sofa_fs = self._serialize_feature_structure(view.sofa)
             feature_structures.append(json_sofa_fs)
 
         # Find all fs, even the ones that are not directly added to a sofa
         for fs in sorted(cas._find_all_fs(include_inlinable_arrays=True), key=lambda a: a.xmiID):
-            json_fs = self._serialize_feature_structure(cas, fs)
+            json_fs = self._serialize_feature_structure(fs)
             feature_structures.append(json_fs)
 
         if isinstance(sink, BytesIO):
@@ -295,7 +295,7 @@ def _serialize_feature(self, json_type, feature: Feature):
 
         json_feature = {
             NAME_FIELD: feature_name,
-            RANGE_FIELD: self._to_external_type_name(feature.rangeTypeName),
+            RANGE_FIELD: self._to_external_type_name(feature.rangeType.name),
         }
 
         if feature.description:
@@ -309,8 +309,7 @@ def _serialize_feature(self, json_type, feature: Feature):
 
         return json_feature
 
-    def _serialize_feature_structure(self, cas, fs) -> dict:
-        ts = cas.typesystem
+    def _serialize_feature_structure(self, fs) -> dict:
         type_name = fs.type.name
 
         json_fs = OrderedDict()
@@ -321,7 +320,7 @@ def _serialize_feature_structure(self, cas, fs) -> dict:
             if fs.elements:
                 json_fs[ELEMENTS_FIELD] = base64.b64encode(bytes(fs.elements)).decode("ascii")
             return json_fs
-        elif ts.is_primitive_array(type_name):
+        elif is_primitive_array(fs.type):
             if fs.elements:
                 json_fs[ELEMENTS_FIELD] = fs.elements
             return json_fs
@@ -350,7 +349,7 @@ def _serialize_feature_structure(self, cas, fs) -> dict:
             #    sofa: Sofa = getattr(fs, "sofa")
             #    value = sofa._offset_converter.cassis_to_uima(value)
 
-            if ts.is_primitive(feature.rangeTypeName):
+            if is_primitive(feature.rangeType):
                 json_fs[feature_name] = value
             else:
                 # We need to encode non-primitive features as a reference

From 88ec59b379463e33ff00d5c0cc8ca0ca3f306406 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Wed, 25 Aug 2021 22:39:30 +0200
Subject: [PATCH 13/22] #168 - Experimental JSON CAS support

- Tune performance when serializing JSON to string instead of writing to disk
- Added rudimentary performance "test"
---
 cassis/cas.py        |  4 +--
 cassis/json.py       |  6 ++--
 cassis/xmi.py        | 11 +++++++-
 tests/performance.py | 67 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 6 deletions(-)
 create mode 100644 tests/performance.py

diff --git a/cassis/cas.py b/cassis/cas.py
index 34fe87f..b2e1825 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -604,9 +604,7 @@ def _serialize(self, serializer, path: Union[str, Path, None] = None, pretty_pri
         """
         # If `path` is None, then serialize to a string and return it
         if path is None:
-            sink = BytesIO()
-            serializer.serialize(sink, self, pretty_print=pretty_print)
-            return sink.getvalue().decode("utf-8")
+            return serializer.serialize(None, self, pretty_print=pretty_print)
         elif isinstance(path, str):
             with open(path, "wb") as f:
                 serializer.serialize(f, self, pretty_print=pretty_print)
diff --git a/cassis/json.py b/cassis/json.py
index 4f962ab..4cd74e2 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -234,7 +234,7 @@ class CasJsonSerializer:
     def __init__(self):
         pass
 
-    def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
+    def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]:
         data = {}
         types = data[TYPES_FIELD] = {}
         views = data[VIEWS_FIELD] = {}
@@ -265,11 +265,13 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
         if sink:
             json.dump(data, sink, sort_keys=False, indent=2 if pretty_print else None)
         else:
-            json.dumps(data, sort_keys=False, indent=2 if pretty_print else None)
+            return json.dumps(data, sort_keys=False, indent=2 if pretty_print else None)
 
         if isinstance(sink, TextIOWrapper):
             sink.detach()  # Prevent TextIOWrapper from closing the BytesIO
 
+        return None
+
     def _serialize_type(self, type_: Type):
         type_name = self._to_external_type_name(type_.name)
         supertype_name = self._to_external_type_name(type_.supertype.name)
diff --git a/cassis/xmi.py b/cassis/xmi.py
index 3ea65a5..72448ac 100644
--- a/cassis/xmi.py
+++ b/cassis/xmi.py
@@ -363,7 +363,7 @@ def __init__(self):
         self._urls_to_prefixes = {}
         self._duplicate_namespaces = defaultdict(int)
 
-    def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
+    def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]:
         xmi_attrs = {"{http://www.omg.org/XMI}version": "2.0"}
 
         root = etree.Element(etree.QName(self._nsmap["xmi"], "XMI"), nsmap=self._nsmap, **xmi_attrs)
@@ -383,8 +383,17 @@ def serialize(self, sink: Union[IO, str], cas: Cas, pretty_print=True):
         doc = etree.ElementTree(root)
         etree.cleanup_namespaces(doc, top_nsmap=self._nsmap)
 
+        return_str = sink is None
+        if return_str:
+            sink = BytesIO()
+
         doc.write(sink, xml_declaration=True, pretty_print=pretty_print, encoding="UTF-8")
 
+        if return_str:
+            return sink.getvalue().decode("utf-8")
+
+        return None
+
     def _serialize_cas_null(self, root: etree.Element):
         name = etree.QName(self._nsmap["cas"], "NULL")
         elem = etree.SubElement(root, name)
diff --git a/tests/performance.py b/tests/performance.py
new file mode 100644
index 0000000..6a1b289
--- /dev/null
+++ b/tests/performance.py
@@ -0,0 +1,67 @@
+from random import Random
+from timeit import default_timer as timer
+
+from cassis import load_cas_from_json, load_cas_from_xmi
+from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator
+
+generator = MultiFeatureRandomCasGenerator()
+generator.rnd = Random(123456)
+generator.size = 1000
+iterations = 100
+
+typesystem = generator.generate_type_system()
+randomized_cas = generator.generate_cas(typesystem)
+randomized_cas_xmi = randomized_cas.to_xmi()
+randomized_cas_json = randomized_cas.to_json()
+
+
+def test_xmi_serialization_performance():
+    start = timer()
+    for i in range(0, iterations):
+        if i % 10 == 0:
+            print(".", end='')
+        if i % 100 == 0:
+            print(f"{i}")
+        randomized_cas.to_xmi()
+    end = timer()
+
+    print(f"XMI: Serializing {iterations} CASes took {end - start} seconds")
+
+
+def test_json_serialization_performance():
+    start = timer()
+    for i in range(0, iterations):
+        if i % 10 == 0:
+            print(".", end='')
+        if i % 100 == 0:
+            print(f"{i}")
+        randomized_cas.to_json()
+    end = timer()
+
+    print(f"JSON: Serializing {iterations} CASes took {end - start} seconds")
+
+
+def test_xmi_deserialization_performance():
+    start = timer()
+    for i in range(0, iterations):
+        if i % 10 == 0:
+            print(".", end='')
+        if i % 100 == 0:
+            print(f"{i}")
+        load_cas_from_xmi(randomized_cas_xmi, typesystem)
+    end = timer()
+
+    print(f"XMI: Deserializing {iterations} CASes took {end - start} seconds")
+
+
+def test_json_deserialization_performance():
+    start = timer()
+    for i in range(0, iterations):
+        if i % 10 == 0:
+            print(".", end='')
+        if i % 100 == 0:
+            print(f"{i}")
+        load_cas_from_json(randomized_cas_json, typesystem)
+    end = timer()
+
+    print(f"JSON: Deserializing {iterations} CASes took {end - start} seconds")

From 053bbf73e9c2df9f064c376709a937c6184fc1ad Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Mon, 30 Aug 2021 16:46:44 +0200
Subject: [PATCH 14/22] [UIMA-6266] Clean JSON Wire Format for CAS

- Do not execute performance "tests" when running make test
- Update JSON reference data with new data from UIMA Java SDK - including CAS examples using emojis and other Unicode characters
- Enabled character offset conversion on import/export in JSON (de)serializer
---
 Makefile                                      |   2 +-
 cassis/cas.py                                 |  36 +++---
 cassis/json.py                                |  21 +++-
 tests/performance.py                          |  22 +---
 .../data.json                                 |  78 ++++++++++++
 .../debug-typesystem.xml                      |   0
 .../debug.xmi                                 |  15 +++
 .../data.json                                 |  30 +++--
 .../debug-typesystem.xml                      |  17 +++
 .../debug.xmi                                 |   8 ++
 .../ser-ref/casWithSofaDataArray/data.json    |  20 ++--
 .../ser-ref/casWithSofaDataURI/data.json      |  14 +--
 .../fs_as_array/ser-ref/casWithText/data.json |  14 +--
 .../casWithTextAndAnnotation/debug.xmi        |   7 --
 .../casWithTextAndAnnotations/data.json       |  48 ++++++++
 .../debug-typesystem.xml                      |  17 +++
 .../casWithTextAndAnnotations/debug.xmi       |   9 ++
 .../data.json                                 |  48 ++++++++
 .../debug-typesystem.xml                      |  17 +++
 .../debug.xmi                                 |  10 ++
 .../data.json                                 |  39 ++++++
 .../debug-typesystem.xml                      |  17 +++
 .../debug.xmi                                 |   9 ++
 .../fs_as_array/ser-ref/emptyCas/data.json    |   9 ++
 .../ser-ref/emptyCas/debug-typesystem.xml     |  17 +++
 .../fs_as_array/ser-ref/emptyCas/debug.xmi    |   3 +
 tests/test_json.py                            | 111 ++++++++++++++++--
 27 files changed, 550 insertions(+), 88 deletions(-)
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/data.json
 rename tests/test_files/json/fs_as_array/ser-ref/{casWithTextAndAnnotation => casWithEmojiUnicodeTextAndAnnotations}/debug-typesystem.xml (100%)
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug.xmi
 rename tests/test_files/json/fs_as_array/ser-ref/{casWithTextAndAnnotation => casWithLeftToRightTextAndAnnotations}/data.json (67%)
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug.xmi
 delete mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/emptyCas/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug.xmi

diff --git a/Makefile b/Makefile
index affc02b..584220e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 PYTHON_FILES = cassis tests
 
 test:
-	python -m pytest tests/
+	python -m pytest -m "not performance" tests/
 
 format:
 	black -l 120 cassis/
diff --git a/cassis/cas.py b/cassis/cas.py
index b2e1825..a65dcce 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -1,6 +1,5 @@
 import sys
 from collections import defaultdict
-from io import BytesIO
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional, Tuple, Union
 
@@ -37,16 +36,16 @@ class OffsetConverter:
     """
 
     def __init__(self):
-        self._uima_to_cassis: Dict[int, int] = {0: 0}
-        self._cassis_to_uima: Dict[int, int] = {0: 0}
+        self._uima_to_cassis: Union[Dict[int, int], None] = None
+        self._cassis_to_uima: Union[Dict[int, int], None] = None
 
     def create_index(self, sofa_string: str):
-        self._uima_to_cassis.clear()
-        self._cassis_to_uima.clear()
-
         if sofa_string is None:
             return
 
+        self._uima_to_cassis = {0: 0}
+        self._cassis_to_uima = {0: 0}
+
         count_uima = 0
         count_cassis = 0
 
@@ -67,11 +66,19 @@ def create_index(self, sofa_string: str):
     def uima_to_cassis(self, idx: Optional[int]) -> Optional[int]:
         if idx is None:
             return None
+
+        if self._uima_to_cassis is None:
+            return idx
+
         return self._uima_to_cassis[idx]
 
     def cassis_to_uima(self, idx: Optional[int]) -> Optional[int]:
         if idx is None:
             return None
+
+        if self._cassis_to_uima is None:
+            return idx
+
         return self._cassis_to_uima[idx]
 
 
@@ -572,9 +579,11 @@ def to_xmi(self, path: Union[str, Path, None] = None, pretty_print: bool = False
         """
         from cassis.xmi import CasXmiSerializer
 
-        return self._serialize(CasXmiSerializer(), path, pretty_print)
+        return self._serialize(CasXmiSerializer(), path, pretty_print=pretty_print)
 
-    def to_json(self, path: Union[str, Path, None] = None, pretty_print: bool = False) -> Optional[str]:
+    def to_json(
+        self, path: Union[str, Path, None] = None, pretty_print: bool = False, ensure_ascii=False
+    ) -> Optional[str]:
         """Creates a JSON representation of this CAS.
 
         Args:
@@ -588,14 +597,13 @@ def to_json(self, path: Union[str, Path, None] = None, pretty_print: bool = Fals
         """
         from cassis.json import CasJsonSerializer
 
-        return self._serialize(CasJsonSerializer(), path, pretty_print)
+        return self._serialize(CasJsonSerializer(), path, pretty_print=pretty_print, ensure_ascii=ensure_ascii)
 
-    def _serialize(self, serializer, path: Union[str, Path, None] = None, pretty_print: bool = False):
+    def _serialize(self, serializer, path: Union[str, Path, None] = None, **kwargs):
         """Runs this CAS through the given serializer.
 
         Args:
             path: File path, if `None` is provided the result is returned as a string
-            pretty_print: `True` if the resulting data should be pretty-printed, else `False`
 
 
         Returns:
@@ -604,13 +612,13 @@ def _serialize(self, serializer, path: Union[str, Path, None] = None, pretty_pri
         """
         # If `path` is None, then serialize to a string and return it
         if path is None:
-            return serializer.serialize(None, self, pretty_print=pretty_print)
+            return serializer.serialize(None, self, **kwargs)
         elif isinstance(path, str):
             with open(path, "wb") as f:
-                serializer.serialize(f, self, pretty_print=pretty_print)
+                serializer.serialize(f, self, **kwargs)
         elif isinstance(path, Path):
             with path.open("wb") as f:
-                serializer.serialize(f, self, pretty_print=pretty_print)
+                serializer.serialize(f, self, **kwargs)
         else:
             raise TypeError("`path` needs to be one of [str, None, Path], but was <{0}>".format(type(path)))
 
diff --git a/cassis/json.py b/cassis/json.py
index 4cd74e2..d4e2fee 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -198,6 +198,13 @@ def fix_up(elements):
         fs = AnnotationType(**attributes)
 
         self._resolve_references(fs, ref_features, feature_structures)
+
+        # Map from offsets in UIMA UTF-16 based offsets to Unicode codepoints
+        if typesystem.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION):
+            sofa = fs.sofa
+            fs.begin = sofa._offset_converter.uima_to_cassis(fs.begin)
+            fs.end = sofa._offset_converter.uima_to_cassis(fs.end)
+
         return fs
 
     def _parse_primitive_array(self, type_name: str, elements: [list, str]) -> List:
@@ -234,7 +241,9 @@ class CasJsonSerializer:
     def __init__(self):
         pass
 
-    def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]:
+    def serialize(
+        self, sink: Union[IO, str, None], cas: Cas, pretty_print: bool = True, ensure_ascii: bool = False
+    ) -> Union[str, None]:
         data = {}
         types = data[TYPES_FIELD] = {}
         views = data[VIEWS_FIELD] = {}
@@ -263,9 +272,9 @@ def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) ->
             sink = TextIOWrapper(sink, encoding="utf-8", write_through=True)
 
         if sink:
-            json.dump(data, sink, sort_keys=False, indent=2 if pretty_print else None)
+            json.dump(data, sink, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii)
         else:
-            return json.dumps(data, sort_keys=False, indent=2 if pretty_print else None)
+            return json.dumps(data, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii)
 
         if isinstance(sink, TextIOWrapper):
             sink.detach()  # Prevent TextIOWrapper from closing the BytesIO
@@ -347,9 +356,9 @@ def _serialize_feature_structure(self, fs) -> dict:
                 continue
 
             # Map back from offsets in Unicode codepoints to UIMA UTF-16 based offsets
-            # if ts.is_instance_of(fs.type, "uima.tcas.Annotation") and feature_name == "begin" or feature_name == "end":
-            #    sofa: Sofa = getattr(fs, "sofa")
-            #    value = sofa._offset_converter.cassis_to_uima(value)
+            if feature.domainType.name == TYPE_NAME_ANNOTATION and feature_name == "begin" or feature_name == "end":
+                sofa: Sofa = getattr(fs, "sofa")
+                value = sofa._offset_converter.cassis_to_uima(value)
 
             if is_primitive(feature.rangeType):
                 json_fs[feature_name] = value
diff --git a/tests/performance.py b/tests/performance.py
index 6a1b289..aaff08a 100644
--- a/tests/performance.py
+++ b/tests/performance.py
@@ -1,6 +1,8 @@
 from random import Random
 from timeit import default_timer as timer
 
+import pytest
+
 from cassis import load_cas_from_json, load_cas_from_xmi
 from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator
 
@@ -15,52 +17,40 @@
 randomized_cas_json = randomized_cas.to_json()
 
 
+@pytest.mark.performance
 def test_xmi_serialization_performance():
     start = timer()
     for i in range(0, iterations):
-        if i % 10 == 0:
-            print(".", end='')
-        if i % 100 == 0:
-            print(f"{i}")
         randomized_cas.to_xmi()
     end = timer()
 
     print(f"XMI: Serializing {iterations} CASes took {end - start} seconds")
 
 
+@pytest.mark.performance
 def test_json_serialization_performance():
     start = timer()
     for i in range(0, iterations):
-        if i % 10 == 0:
-            print(".", end='')
-        if i % 100 == 0:
-            print(f"{i}")
         randomized_cas.to_json()
     end = timer()
 
     print(f"JSON: Serializing {iterations} CASes took {end - start} seconds")
 
 
+@pytest.mark.performance
 def test_xmi_deserialization_performance():
     start = timer()
     for i in range(0, iterations):
-        if i % 10 == 0:
-            print(".", end='')
-        if i % 100 == 0:
-            print(f"{i}")
         load_cas_from_xmi(randomized_cas_xmi, typesystem)
     end = timer()
 
     print(f"XMI: Deserializing {iterations} CASes took {end - start} seconds")
 
 
+@pytest.mark.performance
 def test_json_deserialization_performance():
     start = timer()
     for i in range(0, iterations):
-        if i % 10 == 0:
-            print(".", end='')
-        if i % 100 == 0:
-            print(f"{i}")
         load_cas_from_json(randomized_cas_json, typesystem)
     end = timer()
 
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/data.json
new file mode 100644
index 0000000..422cea5
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/data.json
@@ -0,0 +1,78 @@
+{
+  "%TYPES" : { },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text",
+    "sofaString" : "\uD83E\uDD73 This \uD83D\uDC73\uD83C\uDFFB‍♀️ is ✆ a \uD83E\uDDD4\uD83C\uDFFE‍♂️ test \uD83D\uDC7B"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 2
+  }, {
+    "%ID" : 3,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 3,
+    "end" : 7
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 8,
+    "end" : 15
+  }, {
+    "%ID" : 5,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 16,
+    "end" : 18
+  }, {
+    "%ID" : 6,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 19,
+    "end" : 20
+  }, {
+    "%ID" : 7,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 21,
+    "end" : 22
+  }, {
+    "%ID" : 8,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 23,
+    "end" : 30
+  }, {
+    "%ID" : 9,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 31,
+    "end" : 35
+  }, {
+    "%ID" : 10,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 36,
+    "end" : 38
+  }, {
+    "%ID" : 11,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 38,
+    "language" : "x-unspecified"
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug-typesystem.xml
similarity index 100%
rename from tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug-typesystem.xml
rename to tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug-typesystem.xml
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug.xmi
new file mode 100644
index 0000000..6d8ec43
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug.xmi
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="2"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="3" end="7"/>
+    <tcas:Annotation xmi:id="4" sofa="1" begin="8" end="15"/>
+    <tcas:Annotation xmi:id="5" sofa="1" begin="16" end="18"/>
+    <tcas:Annotation xmi:id="6" sofa="1" begin="19" end="20"/>
+    <tcas:Annotation xmi:id="7" sofa="1" begin="21" end="22"/>
+    <tcas:Annotation xmi:id="8" sofa="1" begin="23" end="30"/>
+    <tcas:Annotation xmi:id="9" sofa="1" begin="31" end="35"/>
+    <tcas:Annotation xmi:id="10" sofa="1" begin="36" end="38"/>
+    <tcas:DocumentAnnotation xmi:id="11" sofa="1" begin="0" end="38" language="x-unspecified"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="&#129395; This &#128115;&#127995;‍♀️ is ✆ a &#129492;&#127998;‍♂️ test &#128123;"/>
+    <cas:View sofa="1" members="2 3 4 5 6 7 8 9 10 11"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/data.json
similarity index 67%
rename from tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
rename to tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/data.json
index 7879a33..1944181 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/data.json
@@ -1,30 +1,36 @@
 {
   "%TYPES" : { },
-  "%VIEWS" : {
-    "_InitialView" : {
-      "%SOFA" : 1,
-      "%MEMBERS" : [ 2, 3 ]
-    }
-  },
   "%FEATURE_STRUCTURES" : [ {
     "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
     "sofaNum" : 1,
     "sofaID" : "_InitialView",
     "mimeType" : "text",
-    "sofaString" : "This is a test."
+    "sofaString" : "هذا اختبار"
   }, {
     "%ID" : 2,
-    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "%TYPE" : "uima.tcas.Annotation",
     "@sofa" : 1,
     "begin" : 0,
-    "end" : 15,
-    "language" : "x-unspecified"
+    "end" : 3
   }, {
     "%ID" : 3,
     "%TYPE" : "uima.tcas.Annotation",
     "@sofa" : 1,
+    "begin" : 4,
+    "end" : 10
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 1,
     "begin" : 0,
-    "end" : 15
-  } ]
+    "end" : 10,
+    "language" : "x-unspecified"
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4 ]
+    }
+  }
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug.xmi
new file mode 100644
index 0000000..108d362
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug.xmi
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="3"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="4" end="10"/>
+    <tcas:DocumentAnnotation xmi:id="4" sofa="1" begin="0" end="10" language="x-unspecified"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="هذا اختبار"/>
+    <cas:View sofa="1" members="2 3 4"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
index edf6ddc..20d935b 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
@@ -1,21 +1,21 @@
 {
   "%TYPES" : { },
-  "%VIEWS" : {
-    "_InitialView" : {
-      "%SOFA" : 1,
-      "%MEMBERS" : [ ]
-    }
-  },
   "%FEATURE_STRUCTURES" : [ {
-    "%ID" : 2,
+    "%ID" : 1,
     "%TYPE" : "uima.cas.ByteArray",
     "%ELEMENTS" : "VGhpcyBpcyBhIHRlc3Q="
   }, {
-    "%ID" : 1,
+    "%ID" : 2,
     "%TYPE" : "uima.cas.Sofa",
     "sofaNum" : 1,
     "sofaID" : "_InitialView",
     "mimeType" : "text/plain",
-    "@sofaArray" : 2
-  } ]
+    "@sofaArray" : 1
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 2,
+      "%MEMBERS" : [ ]
+    }
+  }
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
index 266ab55..0b142a8 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
@@ -1,11 +1,5 @@
 {
   "%TYPES" : { },
-  "%VIEWS" : {
-    "_InitialView" : {
-      "%SOFA" : 1,
-      "%MEMBERS" : [ ]
-    }
-  },
   "%FEATURE_STRUCTURES" : [ {
     "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
@@ -13,5 +7,11 @@
     "sofaID" : "_InitialView",
     "mimeType" : "text/plain",
     "sofaURI" : "classpath:/ProgrammaticallyCreatedCasDataSuite/document.txt"
-  } ]
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ ]
+    }
+  }
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
index 1fe9f02..39f5ffe 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
@@ -1,11 +1,5 @@
 {
   "%TYPES" : { },
-  "%VIEWS" : {
-    "_InitialView" : {
-      "%SOFA" : 1,
-      "%MEMBERS" : [ 2 ]
-    }
-  },
   "%FEATURE_STRUCTURES" : [ {
     "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
@@ -20,5 +14,11 @@
     "begin" : 0,
     "end" : 15,
     "language" : "x-unspecified"
-  } ]
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2 ]
+    }
+  }
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
deleted file mode 100644
index 7292031..0000000
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
-    <cas:NULL xmi:id="0"/>
-    <tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="15" language="x-unspecified"/>
-    <tcas:Annotation xmi:id="3" sofa="1" begin="0" end="15"/>
-    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="This is a test."/>
-    <cas:View sofa="1" members="2 3"/>
-</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/data.json
new file mode 100644
index 0000000..a9522cf
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/data.json
@@ -0,0 +1,48 @@
+{
+  "%TYPES" : { },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text",
+    "sofaString" : "This is a test"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 4
+  }, {
+    "%ID" : 3,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 5,
+    "end" : 7
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 8,
+    "end" : 9
+  }, {
+    "%ID" : 5,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 10,
+    "end" : 14
+  }, {
+    "%ID" : 6,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 14,
+    "language" : "x-unspecified"
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4, 5, 6 ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug.xmi
new file mode 100644
index 0000000..37c1e9b
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug.xmi
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="4"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="5" end="7"/>
+    <tcas:Annotation xmi:id="4" sofa="1" begin="8" end="9"/>
+    <tcas:Annotation xmi:id="5" sofa="1" begin="10" end="14"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView"/>
+    <cas:View sofa="1" members="2 3 4 5"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/data.json
new file mode 100644
index 0000000..d586738
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/data.json
@@ -0,0 +1,48 @@
+{
+  "%TYPES" : { },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text",
+    "sofaString" : "這是一個測試"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 1
+  }, {
+    "%ID" : 3,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 1,
+    "end" : 2
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 2,
+    "end" : 4
+  }, {
+    "%ID" : 5,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 4,
+    "end" : 6
+  }, {
+    "%ID" : 6,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 6,
+    "language" : "x-unspecified"
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4, 5, 6 ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug.xmi
new file mode 100644
index 0000000..0087d72
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug.xmi
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="1"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="1" end="2"/>
+    <tcas:Annotation xmi:id="4" sofa="1" begin="2" end="4"/>
+    <tcas:Annotation xmi:id="5" sofa="1" begin="4" end="6"/>
+    <tcas:DocumentAnnotation xmi:id="6" sofa="1" begin="0" end="6" language="x-unspecified"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="這是一個測試"/>
+    <cas:View sofa="1" members="2 3 4 5 6"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/data.json
new file mode 100644
index 0000000..56784fe
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/data.json
@@ -0,0 +1,39 @@
+{
+  "%TYPES" : { },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 4
+  }, {
+    "%ID" : 3,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 5,
+    "end" : 7
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 8,
+    "end" : 9
+  }, {
+    "%ID" : 5,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 10,
+    "end" : 14
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4, 5 ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug.xmi
new file mode 100644
index 0000000..37c1e9b
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug.xmi
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="4"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="5" end="7"/>
+    <tcas:Annotation xmi:id="4" sofa="1" begin="8" end="9"/>
+    <tcas:Annotation xmi:id="5" sofa="1" begin="10" end="14"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView"/>
+    <cas:View sofa="1" members="2 3 4 5"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/emptyCas/data.json b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/data.json
new file mode 100644
index 0000000..fcd8582
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/data.json
@@ -0,0 +1,9 @@
+{
+  "%TYPES" : { },
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug.xmi
new file mode 100644
index 0000000..6fd88bd
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug.xmi
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+</xmi:XMI>
diff --git a/tests/test_json.py b/tests/test_json.py
index 0765ca3..cc02c56 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -1,28 +1,92 @@
 import json
 
+from cassis.typesystem import TYPE_NAME_ANNOTATION
 from tests.fixtures import *
 from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
 from tests.util import assert_json_equal
 
-FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files")
+FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files", "json", "fs_as_array", "ser-ref")
 
 FIXTURES = [
-    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithSofaDataArray")),
-    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithSofaDataURI")),
-    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithText")),
-    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithTextAndAnnotation")),
+    (os.path.join(FIXTURE_DIR, "casWithSofaDataArray"), []),
+    (os.path.join(FIXTURE_DIR, "casWithSofaDataURI"), []),
+    (os.path.join(FIXTURE_DIR, "casWithText"), [["uima.tcas.DocumentAnnotation", 0, 15, "This is a test."]]),
+    (
+        os.path.join(FIXTURE_DIR, "casWithoutTextButWithAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 4, None],
+            ["uima.tcas.Annotation", 5, 7, None],
+            ["uima.tcas.Annotation", 8, 9, None],
+            ["uima.tcas.Annotation", 10, 14, None],
+        ],
+    ),
+    (
+        os.path.join(FIXTURE_DIR, "casWithTextAndAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 4, "This"],
+            ["uima.tcas.Annotation", 5, 7, "is"],
+            ["uima.tcas.Annotation", 8, 9, "a"],
+            ["uima.tcas.Annotation", 10, 14, "test"],
+            ["uima.tcas.DocumentAnnotation", 0, 14, "This is a test"],
+        ],
+    ),
+    (
+        os.path.join(FIXTURE_DIR, "casWithEmojiUnicodeTextAndAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 1, "🥳", b"\xf0\x9f\xa5\xb3"],
+            ["uima.tcas.Annotation", 2, 6, "This"],
+            [
+                "uima.tcas.Annotation",
+                7,
+                12,
+                "👳🏻\u200d♀️",
+                b"\xf0\x9f\x91\xb3\xf0\x9f\x8f\xbb\xe2\x80\x8d\xe2\x99\x80\xef\xb8\x8f",
+            ],
+            ["uima.tcas.Annotation", 13, 15, "is"],
+            ["uima.tcas.Annotation", 16, 17, "✆", b"\xe2\x9c\x86"],
+            ["uima.tcas.Annotation", 18, 19, "a"],
+            [
+                "uima.tcas.Annotation",
+                20,
+                25,
+                "🧔🏾\u200d♂️",
+                b"\xf0\x9f\xa7\x94\xf0\x9f\x8f\xbe\xe2\x80\x8d\xe2\x99\x82\xef\xb8\x8f",
+            ],
+            ["uima.tcas.Annotation", 26, 30, "test"],
+            ["uima.tcas.Annotation", 31, 32, "👻", b"\xf0\x9f\x91\xbb"],
+            ["uima.tcas.DocumentAnnotation", 0, 32, "🥳 This 👳🏻\u200d♀️ is ✆ a 🧔🏾\u200d♂️ test 👻"],
+        ],
+    ),
+    (
+        os.path.join(FIXTURE_DIR, "casWithLeftToRightTextAndAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 3, "هذا"],
+            ["uima.tcas.Annotation", 4, 10, "اختبار"],
+            ["uima.tcas.DocumentAnnotation", 0, 10, "هذا اختبار"],
+        ],
+    ),
+    (
+        os.path.join(FIXTURE_DIR, "casWithTraditionalChineseTextAndAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 1, "這"],
+            ["uima.tcas.Annotation", 1, 2, "是"],
+            ["uima.tcas.Annotation", 2, 4, "一個"],
+            ["uima.tcas.Annotation", 4, 6, "測試"],
+            ["uima.tcas.DocumentAnnotation", 0, 6, "這是一個測試"],
+        ],
+    ),
 ]
 
 
-@pytest.mark.parametrize("json_path", FIXTURES)
-def test_deserialization_serialization(json_path):
+@pytest.mark.parametrize("json_path, annotations", FIXTURES)
+def test_deserialization_serialization(json_path, annotations):
     with open(os.path.join(json_path, "data.json"), "rb") as f:
         cas = load_cas_from_json(f)
 
     with open(os.path.join(json_path, "data.json"), "rb") as f:
         expected_json = json.load(f)
 
-    actual_json = cas.to_json()
+    actual_json = cas.to_json(pretty_print=True)
 
     assert_json_equal(actual_json, expected_json, sort_keys=True)
 
@@ -56,3 +120,34 @@ def test_multi_feature_random_serialization_deserialization():
         actual_json = loaded_cas.to_json()
 
         assert_json_equal(actual_json, expected_json)
+
+
+@pytest.mark.parametrize("json_path, annotations", FIXTURES)
+def test_unicode(json_path, annotations):
+    with open(os.path.join(json_path, "data.json"), "rb") as f:
+        cas = load_cas_from_json(f)
+
+    actual_annotations = [
+        [a.type.name, a.begin, a.end, a.get_covered_text()]
+        for a in sorted(cas.select(TYPE_NAME_ANNOTATION), key=lambda k: k.type.name)
+    ]
+    expected_annotations = [a[0:4] for a in annotations]
+    assert actual_annotations == expected_annotations
+
+    for i in range(0, len(annotations)):
+        expected = annotations[i]
+        actual = actual_annotations[i]
+
+        expected_covered_text = expected[3]
+        actual_covered_text = actual[3]
+
+        if not expected_covered_text:
+            continue
+
+        for n in range(len(actual_covered_text)):
+            print(f"{n}: [{actual_covered_text[n]}] {hex(ord(actual_covered_text[n]))}")
+
+        if len(expected) >= 5:
+            expected_utf8_bytes = expected[4]
+            actual_utf8_bytes = bytes(actual_covered_text, "UTF-8")
+            assert actual_utf8_bytes == expected_utf8_bytes

From 36709b1ac6abf6f471ca37e2c881f58707d6589e Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Mon, 30 Aug 2021 16:46:44 +0200
Subject: [PATCH 15/22] - Do not execute performance "tests" when running make
 test - Update JSON reference data with new data from UIMA Java SDK -
 including CAS examples using emojis and other Unicode characters - Enabled
 character offset conversion on import/export in JSON (de)serializer

---
 Makefile                                      |   2 +-
 cassis/cas.py                                 |  36 +++---
 cassis/json.py                                |  21 +++-
 tests/performance.py                          |  22 +---
 .../data.json                                 |  78 ++++++++++++
 .../debug-typesystem.xml                      |   0
 .../debug.xmi                                 |  15 +++
 .../data.json                                 |  30 +++--
 .../debug-typesystem.xml                      |  17 +++
 .../debug.xmi                                 |   8 ++
 .../ser-ref/casWithSofaDataArray/data.json    |  20 ++--
 .../ser-ref/casWithSofaDataURI/data.json      |  14 +--
 .../fs_as_array/ser-ref/casWithText/data.json |  14 +--
 .../casWithTextAndAnnotation/debug.xmi        |   7 --
 .../casWithTextAndAnnotations/data.json       |  48 ++++++++
 .../debug-typesystem.xml                      |  17 +++
 .../casWithTextAndAnnotations/debug.xmi       |   9 ++
 .../data.json                                 |  48 ++++++++
 .../debug-typesystem.xml                      |  17 +++
 .../debug.xmi                                 |  10 ++
 .../data.json                                 |  39 ++++++
 .../debug-typesystem.xml                      |  17 +++
 .../debug.xmi                                 |   9 ++
 .../fs_as_array/ser-ref/emptyCas/data.json    |   9 ++
 .../ser-ref/emptyCas/debug-typesystem.xml     |  17 +++
 .../fs_as_array/ser-ref/emptyCas/debug.xmi    |   3 +
 tests/test_json.py                            | 111 ++++++++++++++++--
 27 files changed, 550 insertions(+), 88 deletions(-)
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/data.json
 rename tests/test_files/json/fs_as_array/ser-ref/{casWithTextAndAnnotation => casWithEmojiUnicodeTextAndAnnotations}/debug-typesystem.xml (100%)
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug.xmi
 rename tests/test_files/json/fs_as_array/ser-ref/{casWithTextAndAnnotation => casWithLeftToRightTextAndAnnotations}/data.json (67%)
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug.xmi
 delete mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/emptyCas/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug.xmi

diff --git a/Makefile b/Makefile
index affc02b..584220e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 PYTHON_FILES = cassis tests
 
 test:
-	python -m pytest tests/
+	python -m pytest -m "not performance" tests/
 
 format:
 	black -l 120 cassis/
diff --git a/cassis/cas.py b/cassis/cas.py
index b2e1825..a65dcce 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -1,6 +1,5 @@
 import sys
 from collections import defaultdict
-from io import BytesIO
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional, Tuple, Union
 
@@ -37,16 +36,16 @@ class OffsetConverter:
     """
 
     def __init__(self):
-        self._uima_to_cassis: Dict[int, int] = {0: 0}
-        self._cassis_to_uima: Dict[int, int] = {0: 0}
+        self._uima_to_cassis: Union[Dict[int, int], None] = None
+        self._cassis_to_uima: Union[Dict[int, int], None] = None
 
     def create_index(self, sofa_string: str):
-        self._uima_to_cassis.clear()
-        self._cassis_to_uima.clear()
-
         if sofa_string is None:
             return
 
+        self._uima_to_cassis = {0: 0}
+        self._cassis_to_uima = {0: 0}
+
         count_uima = 0
         count_cassis = 0
 
@@ -67,11 +66,19 @@ def create_index(self, sofa_string: str):
     def uima_to_cassis(self, idx: Optional[int]) -> Optional[int]:
         if idx is None:
             return None
+
+        if self._uima_to_cassis is None:
+            return idx
+
         return self._uima_to_cassis[idx]
 
     def cassis_to_uima(self, idx: Optional[int]) -> Optional[int]:
         if idx is None:
             return None
+
+        if self._cassis_to_uima is None:
+            return idx
+
         return self._cassis_to_uima[idx]
 
 
@@ -572,9 +579,11 @@ def to_xmi(self, path: Union[str, Path, None] = None, pretty_print: bool = False
         """
         from cassis.xmi import CasXmiSerializer
 
-        return self._serialize(CasXmiSerializer(), path, pretty_print)
+        return self._serialize(CasXmiSerializer(), path, pretty_print=pretty_print)
 
-    def to_json(self, path: Union[str, Path, None] = None, pretty_print: bool = False) -> Optional[str]:
+    def to_json(
+        self, path: Union[str, Path, None] = None, pretty_print: bool = False, ensure_ascii=False
+    ) -> Optional[str]:
         """Creates a JSON representation of this CAS.
 
         Args:
@@ -588,14 +597,13 @@ def to_json(self, path: Union[str, Path, None] = None, pretty_print: bool = Fals
         """
         from cassis.json import CasJsonSerializer
 
-        return self._serialize(CasJsonSerializer(), path, pretty_print)
+        return self._serialize(CasJsonSerializer(), path, pretty_print=pretty_print, ensure_ascii=ensure_ascii)
 
-    def _serialize(self, serializer, path: Union[str, Path, None] = None, pretty_print: bool = False):
+    def _serialize(self, serializer, path: Union[str, Path, None] = None, **kwargs):
         """Runs this CAS through the given serializer.
 
         Args:
             path: File path, if `None` is provided the result is returned as a string
-            pretty_print: `True` if the resulting data should be pretty-printed, else `False`
 
 
         Returns:
@@ -604,13 +612,13 @@ def _serialize(self, serializer, path: Union[str, Path, None] = None, pretty_pri
         """
         # If `path` is None, then serialize to a string and return it
         if path is None:
-            return serializer.serialize(None, self, pretty_print=pretty_print)
+            return serializer.serialize(None, self, **kwargs)
         elif isinstance(path, str):
             with open(path, "wb") as f:
-                serializer.serialize(f, self, pretty_print=pretty_print)
+                serializer.serialize(f, self, **kwargs)
         elif isinstance(path, Path):
             with path.open("wb") as f:
-                serializer.serialize(f, self, pretty_print=pretty_print)
+                serializer.serialize(f, self, **kwargs)
         else:
             raise TypeError("`path` needs to be one of [str, None, Path], but was <{0}>".format(type(path)))
 
diff --git a/cassis/json.py b/cassis/json.py
index 4cd74e2..d4e2fee 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -198,6 +198,13 @@ def fix_up(elements):
         fs = AnnotationType(**attributes)
 
         self._resolve_references(fs, ref_features, feature_structures)
+
+        # Map from offsets in UIMA UTF-16 based offsets to Unicode codepoints
+        if typesystem.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION):
+            sofa = fs.sofa
+            fs.begin = sofa._offset_converter.uima_to_cassis(fs.begin)
+            fs.end = sofa._offset_converter.uima_to_cassis(fs.end)
+
         return fs
 
     def _parse_primitive_array(self, type_name: str, elements: [list, str]) -> List:
@@ -234,7 +241,9 @@ class CasJsonSerializer:
     def __init__(self):
         pass
 
-    def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]:
+    def serialize(
+        self, sink: Union[IO, str, None], cas: Cas, pretty_print: bool = True, ensure_ascii: bool = False
+    ) -> Union[str, None]:
         data = {}
         types = data[TYPES_FIELD] = {}
         views = data[VIEWS_FIELD] = {}
@@ -263,9 +272,9 @@ def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) ->
             sink = TextIOWrapper(sink, encoding="utf-8", write_through=True)
 
         if sink:
-            json.dump(data, sink, sort_keys=False, indent=2 if pretty_print else None)
+            json.dump(data, sink, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii)
         else:
-            return json.dumps(data, sort_keys=False, indent=2 if pretty_print else None)
+            return json.dumps(data, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii)
 
         if isinstance(sink, TextIOWrapper):
             sink.detach()  # Prevent TextIOWrapper from closing the BytesIO
@@ -347,9 +356,9 @@ def _serialize_feature_structure(self, fs) -> dict:
                 continue
 
             # Map back from offsets in Unicode codepoints to UIMA UTF-16 based offsets
-            # if ts.is_instance_of(fs.type, "uima.tcas.Annotation") and feature_name == "begin" or feature_name == "end":
-            #    sofa: Sofa = getattr(fs, "sofa")
-            #    value = sofa._offset_converter.cassis_to_uima(value)
+            if feature.domainType.name == TYPE_NAME_ANNOTATION and feature_name == "begin" or feature_name == "end":
+                sofa: Sofa = getattr(fs, "sofa")
+                value = sofa._offset_converter.cassis_to_uima(value)
 
             if is_primitive(feature.rangeType):
                 json_fs[feature_name] = value
diff --git a/tests/performance.py b/tests/performance.py
index 6a1b289..aaff08a 100644
--- a/tests/performance.py
+++ b/tests/performance.py
@@ -1,6 +1,8 @@
 from random import Random
 from timeit import default_timer as timer
 
+import pytest
+
 from cassis import load_cas_from_json, load_cas_from_xmi
 from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator
 
@@ -15,52 +17,40 @@
 randomized_cas_json = randomized_cas.to_json()
 
 
+@pytest.mark.performance
 def test_xmi_serialization_performance():
     start = timer()
     for i in range(0, iterations):
-        if i % 10 == 0:
-            print(".", end='')
-        if i % 100 == 0:
-            print(f"{i}")
         randomized_cas.to_xmi()
     end = timer()
 
     print(f"XMI: Serializing {iterations} CASes took {end - start} seconds")
 
 
+@pytest.mark.performance
 def test_json_serialization_performance():
     start = timer()
     for i in range(0, iterations):
-        if i % 10 == 0:
-            print(".", end='')
-        if i % 100 == 0:
-            print(f"{i}")
         randomized_cas.to_json()
     end = timer()
 
     print(f"JSON: Serializing {iterations} CASes took {end - start} seconds")
 
 
+@pytest.mark.performance
 def test_xmi_deserialization_performance():
     start = timer()
     for i in range(0, iterations):
-        if i % 10 == 0:
-            print(".", end='')
-        if i % 100 == 0:
-            print(f"{i}")
         load_cas_from_xmi(randomized_cas_xmi, typesystem)
     end = timer()
 
     print(f"XMI: Deserializing {iterations} CASes took {end - start} seconds")
 
 
+@pytest.mark.performance
 def test_json_deserialization_performance():
     start = timer()
     for i in range(0, iterations):
-        if i % 10 == 0:
-            print(".", end='')
-        if i % 100 == 0:
-            print(f"{i}")
         load_cas_from_json(randomized_cas_json, typesystem)
     end = timer()
 
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/data.json
new file mode 100644
index 0000000..422cea5
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/data.json
@@ -0,0 +1,78 @@
+{
+  "%TYPES" : { },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text",
+    "sofaString" : "\uD83E\uDD73 This \uD83D\uDC73\uD83C\uDFFB‍♀️ is ✆ a \uD83E\uDDD4\uD83C\uDFFE‍♂️ test \uD83D\uDC7B"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 2
+  }, {
+    "%ID" : 3,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 3,
+    "end" : 7
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 8,
+    "end" : 15
+  }, {
+    "%ID" : 5,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 16,
+    "end" : 18
+  }, {
+    "%ID" : 6,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 19,
+    "end" : 20
+  }, {
+    "%ID" : 7,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 21,
+    "end" : 22
+  }, {
+    "%ID" : 8,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 23,
+    "end" : 30
+  }, {
+    "%ID" : 9,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 31,
+    "end" : 35
+  }, {
+    "%ID" : 10,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 36,
+    "end" : 38
+  }, {
+    "%ID" : 11,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 38,
+    "language" : "x-unspecified"
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug-typesystem.xml
similarity index 100%
rename from tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug-typesystem.xml
rename to tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug-typesystem.xml
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug.xmi
new file mode 100644
index 0000000..6d8ec43
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithEmojiUnicodeTextAndAnnotations/debug.xmi
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="2"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="3" end="7"/>
+    <tcas:Annotation xmi:id="4" sofa="1" begin="8" end="15"/>
+    <tcas:Annotation xmi:id="5" sofa="1" begin="16" end="18"/>
+    <tcas:Annotation xmi:id="6" sofa="1" begin="19" end="20"/>
+    <tcas:Annotation xmi:id="7" sofa="1" begin="21" end="22"/>
+    <tcas:Annotation xmi:id="8" sofa="1" begin="23" end="30"/>
+    <tcas:Annotation xmi:id="9" sofa="1" begin="31" end="35"/>
+    <tcas:Annotation xmi:id="10" sofa="1" begin="36" end="38"/>
+    <tcas:DocumentAnnotation xmi:id="11" sofa="1" begin="0" end="38" language="x-unspecified"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="&#129395; This &#128115;&#127995;‍♀️ is ✆ a &#129492;&#127998;‍♂️ test &#128123;"/>
+    <cas:View sofa="1" members="2 3 4 5 6 7 8 9 10 11"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/data.json
similarity index 67%
rename from tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
rename to tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/data.json
index 7879a33..1944181 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/data.json
@@ -1,30 +1,36 @@
 {
   "%TYPES" : { },
-  "%VIEWS" : {
-    "_InitialView" : {
-      "%SOFA" : 1,
-      "%MEMBERS" : [ 2, 3 ]
-    }
-  },
   "%FEATURE_STRUCTURES" : [ {
     "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
     "sofaNum" : 1,
     "sofaID" : "_InitialView",
     "mimeType" : "text",
-    "sofaString" : "This is a test."
+    "sofaString" : "هذا اختبار"
   }, {
     "%ID" : 2,
-    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "%TYPE" : "uima.tcas.Annotation",
     "@sofa" : 1,
     "begin" : 0,
-    "end" : 15,
-    "language" : "x-unspecified"
+    "end" : 3
   }, {
     "%ID" : 3,
     "%TYPE" : "uima.tcas.Annotation",
     "@sofa" : 1,
+    "begin" : 4,
+    "end" : 10
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 1,
     "begin" : 0,
-    "end" : 15
-  } ]
+    "end" : 10,
+    "language" : "x-unspecified"
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4 ]
+    }
+  }
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug.xmi
new file mode 100644
index 0000000..108d362
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithLeftToRightTextAndAnnotations/debug.xmi
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="3"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="4" end="10"/>
+    <tcas:DocumentAnnotation xmi:id="4" sofa="1" begin="0" end="10" language="x-unspecified"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="هذا اختبار"/>
+    <cas:View sofa="1" members="2 3 4"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
index edf6ddc..20d935b 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataArray/data.json
@@ -1,21 +1,21 @@
 {
   "%TYPES" : { },
-  "%VIEWS" : {
-    "_InitialView" : {
-      "%SOFA" : 1,
-      "%MEMBERS" : [ ]
-    }
-  },
   "%FEATURE_STRUCTURES" : [ {
-    "%ID" : 2,
+    "%ID" : 1,
     "%TYPE" : "uima.cas.ByteArray",
     "%ELEMENTS" : "VGhpcyBpcyBhIHRlc3Q="
   }, {
-    "%ID" : 1,
+    "%ID" : 2,
     "%TYPE" : "uima.cas.Sofa",
     "sofaNum" : 1,
     "sofaID" : "_InitialView",
     "mimeType" : "text/plain",
-    "@sofaArray" : 2
-  } ]
+    "@sofaArray" : 1
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 2,
+      "%MEMBERS" : [ ]
+    }
+  }
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
index 266ab55..0b142a8 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithSofaDataURI/data.json
@@ -1,11 +1,5 @@
 {
   "%TYPES" : { },
-  "%VIEWS" : {
-    "_InitialView" : {
-      "%SOFA" : 1,
-      "%MEMBERS" : [ ]
-    }
-  },
   "%FEATURE_STRUCTURES" : [ {
     "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
@@ -13,5 +7,11 @@
     "sofaID" : "_InitialView",
     "mimeType" : "text/plain",
     "sofaURI" : "classpath:/ProgrammaticallyCreatedCasDataSuite/document.txt"
-  } ]
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ ]
+    }
+  }
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
index 1fe9f02..39f5ffe 100644
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithText/data.json
@@ -1,11 +1,5 @@
 {
   "%TYPES" : { },
-  "%VIEWS" : {
-    "_InitialView" : {
-      "%SOFA" : 1,
-      "%MEMBERS" : [ 2 ]
-    }
-  },
   "%FEATURE_STRUCTURES" : [ {
     "%ID" : 1,
     "%TYPE" : "uima.cas.Sofa",
@@ -20,5 +14,11 @@
     "begin" : 0,
     "end" : 15,
     "language" : "x-unspecified"
-  } ]
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2 ]
+    }
+  }
 }
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
deleted file mode 100644
index 7292031..0000000
--- a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotation/debug.xmi
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
-    <cas:NULL xmi:id="0"/>
-    <tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="15" language="x-unspecified"/>
-    <tcas:Annotation xmi:id="3" sofa="1" begin="0" end="15"/>
-    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="This is a test."/>
-    <cas:View sofa="1" members="2 3"/>
-</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/data.json
new file mode 100644
index 0000000..a9522cf
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/data.json
@@ -0,0 +1,48 @@
+{
+  "%TYPES" : { },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text",
+    "sofaString" : "This is a test"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 4
+  }, {
+    "%ID" : 3,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 5,
+    "end" : 7
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 8,
+    "end" : 9
+  }, {
+    "%ID" : 5,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 10,
+    "end" : 14
+  }, {
+    "%ID" : 6,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 14,
+    "language" : "x-unspecified"
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4, 5, 6 ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug.xmi
new file mode 100644
index 0000000..37c1e9b
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTextAndAnnotations/debug.xmi
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="4"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="5" end="7"/>
+    <tcas:Annotation xmi:id="4" sofa="1" begin="8" end="9"/>
+    <tcas:Annotation xmi:id="5" sofa="1" begin="10" end="14"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView"/>
+    <cas:View sofa="1" members="2 3 4 5"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/data.json
new file mode 100644
index 0000000..d586738
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/data.json
@@ -0,0 +1,48 @@
+{
+  "%TYPES" : { },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView",
+    "mimeType" : "text",
+    "sofaString" : "這是一個測試"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 1
+  }, {
+    "%ID" : 3,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 1,
+    "end" : 2
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 2,
+    "end" : 4
+  }, {
+    "%ID" : 5,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 4,
+    "end" : 6
+  }, {
+    "%ID" : 6,
+    "%TYPE" : "uima.tcas.DocumentAnnotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 6,
+    "language" : "x-unspecified"
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4, 5, 6 ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug.xmi
new file mode 100644
index 0000000..0087d72
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithTraditionalChineseTextAndAnnotations/debug.xmi
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="1"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="1" end="2"/>
+    <tcas:Annotation xmi:id="4" sofa="1" begin="2" end="4"/>
+    <tcas:Annotation xmi:id="5" sofa="1" begin="4" end="6"/>
+    <tcas:DocumentAnnotation xmi:id="6" sofa="1" begin="0" end="6" language="x-unspecified"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="這是一個測試"/>
+    <cas:View sofa="1" members="2 3 4 5 6"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/data.json
new file mode 100644
index 0000000..56784fe
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/data.json
@@ -0,0 +1,39 @@
+{
+  "%TYPES" : { },
+  "%FEATURE_STRUCTURES" : [ {
+    "%ID" : 1,
+    "%TYPE" : "uima.cas.Sofa",
+    "sofaNum" : 1,
+    "sofaID" : "_InitialView"
+  }, {
+    "%ID" : 2,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 0,
+    "end" : 4
+  }, {
+    "%ID" : 3,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 5,
+    "end" : 7
+  }, {
+    "%ID" : 4,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 8,
+    "end" : 9
+  }, {
+    "%ID" : 5,
+    "%TYPE" : "uima.tcas.Annotation",
+    "@sofa" : 1,
+    "begin" : 10,
+    "end" : 14
+  } ],
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ 2, 3, 4, 5 ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug.xmi
new file mode 100644
index 0000000..37c1e9b
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithoutTextButWithAnnotations/debug.xmi
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <tcas:Annotation xmi:id="2" sofa="1" begin="0" end="4"/>
+    <tcas:Annotation xmi:id="3" sofa="1" begin="5" end="7"/>
+    <tcas:Annotation xmi:id="4" sofa="1" begin="8" end="9"/>
+    <tcas:Annotation xmi:id="5" sofa="1" begin="10" end="14"/>
+    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView"/>
+    <cas:View sofa="1" members="2 3 4 5"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/emptyCas/data.json b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/data.json
new file mode 100644
index 0000000..fcd8582
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/data.json
@@ -0,0 +1,9 @@
+{
+  "%TYPES" : { },
+  "%VIEWS" : {
+    "_InitialView" : {
+      "%SOFA" : 1,
+      "%MEMBERS" : [ ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug-typesystem.xml
new file mode 100644
index 0000000..07e327a
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug-typesystem.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug.xmi
new file mode 100644
index 0000000..6fd88bd
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/emptyCas/debug.xmi
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+</xmi:XMI>
diff --git a/tests/test_json.py b/tests/test_json.py
index 0765ca3..cc02c56 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -1,28 +1,92 @@
 import json
 
+from cassis.typesystem import TYPE_NAME_ANNOTATION
 from tests.fixtures import *
 from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
 from tests.util import assert_json_equal
 
-FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files")
+FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files", "json", "fs_as_array", "ser-ref")
 
 FIXTURES = [
-    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithSofaDataArray")),
-    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithSofaDataURI")),
-    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithText")),
-    (os.path.join(FIXTURE_DIR, "json", "fs_as_array", "ser-ref", "casWithTextAndAnnotation")),
+    (os.path.join(FIXTURE_DIR, "casWithSofaDataArray"), []),
+    (os.path.join(FIXTURE_DIR, "casWithSofaDataURI"), []),
+    (os.path.join(FIXTURE_DIR, "casWithText"), [["uima.tcas.DocumentAnnotation", 0, 15, "This is a test."]]),
+    (
+        os.path.join(FIXTURE_DIR, "casWithoutTextButWithAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 4, None],
+            ["uima.tcas.Annotation", 5, 7, None],
+            ["uima.tcas.Annotation", 8, 9, None],
+            ["uima.tcas.Annotation", 10, 14, None],
+        ],
+    ),
+    (
+        os.path.join(FIXTURE_DIR, "casWithTextAndAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 4, "This"],
+            ["uima.tcas.Annotation", 5, 7, "is"],
+            ["uima.tcas.Annotation", 8, 9, "a"],
+            ["uima.tcas.Annotation", 10, 14, "test"],
+            ["uima.tcas.DocumentAnnotation", 0, 14, "This is a test"],
+        ],
+    ),
+    (
+        os.path.join(FIXTURE_DIR, "casWithEmojiUnicodeTextAndAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 1, "🥳", b"\xf0\x9f\xa5\xb3"],
+            ["uima.tcas.Annotation", 2, 6, "This"],
+            [
+                "uima.tcas.Annotation",
+                7,
+                12,
+                "👳🏻\u200d♀️",
+                b"\xf0\x9f\x91\xb3\xf0\x9f\x8f\xbb\xe2\x80\x8d\xe2\x99\x80\xef\xb8\x8f",
+            ],
+            ["uima.tcas.Annotation", 13, 15, "is"],
+            ["uima.tcas.Annotation", 16, 17, "✆", b"\xe2\x9c\x86"],
+            ["uima.tcas.Annotation", 18, 19, "a"],
+            [
+                "uima.tcas.Annotation",
+                20,
+                25,
+                "🧔🏾\u200d♂️",
+                b"\xf0\x9f\xa7\x94\xf0\x9f\x8f\xbe\xe2\x80\x8d\xe2\x99\x82\xef\xb8\x8f",
+            ],
+            ["uima.tcas.Annotation", 26, 30, "test"],
+            ["uima.tcas.Annotation", 31, 32, "👻", b"\xf0\x9f\x91\xbb"],
+            ["uima.tcas.DocumentAnnotation", 0, 32, "🥳 This 👳🏻\u200d♀️ is ✆ a 🧔🏾\u200d♂️ test 👻"],
+        ],
+    ),
+    (
+        os.path.join(FIXTURE_DIR, "casWithLeftToRightTextAndAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 3, "هذا"],
+            ["uima.tcas.Annotation", 4, 10, "اختبار"],
+            ["uima.tcas.DocumentAnnotation", 0, 10, "هذا اختبار"],
+        ],
+    ),
+    (
+        os.path.join(FIXTURE_DIR, "casWithTraditionalChineseTextAndAnnotations"),
+        [
+            ["uima.tcas.Annotation", 0, 1, "這"],
+            ["uima.tcas.Annotation", 1, 2, "是"],
+            ["uima.tcas.Annotation", 2, 4, "一個"],
+            ["uima.tcas.Annotation", 4, 6, "測試"],
+            ["uima.tcas.DocumentAnnotation", 0, 6, "這是一個測試"],
+        ],
+    ),
 ]
 
 
-@pytest.mark.parametrize("json_path", FIXTURES)
-def test_deserialization_serialization(json_path):
+@pytest.mark.parametrize("json_path, annotations", FIXTURES)
+def test_deserialization_serialization(json_path, annotations):
     with open(os.path.join(json_path, "data.json"), "rb") as f:
         cas = load_cas_from_json(f)
 
     with open(os.path.join(json_path, "data.json"), "rb") as f:
         expected_json = json.load(f)
 
-    actual_json = cas.to_json()
+    actual_json = cas.to_json(pretty_print=True)
 
     assert_json_equal(actual_json, expected_json, sort_keys=True)
 
@@ -56,3 +120,34 @@ def test_multi_feature_random_serialization_deserialization():
         actual_json = loaded_cas.to_json()
 
         assert_json_equal(actual_json, expected_json)
+
+
+@pytest.mark.parametrize("json_path, annotations", FIXTURES)
+def test_unicode(json_path, annotations):
+    with open(os.path.join(json_path, "data.json"), "rb") as f:
+        cas = load_cas_from_json(f)
+
+    actual_annotations = [
+        [a.type.name, a.begin, a.end, a.get_covered_text()]
+        for a in sorted(cas.select(TYPE_NAME_ANNOTATION), key=lambda k: k.type.name)
+    ]
+    expected_annotations = [a[0:4] for a in annotations]
+    assert actual_annotations == expected_annotations
+
+    for i in range(0, len(annotations)):
+        expected = annotations[i]
+        actual = actual_annotations[i]
+
+        expected_covered_text = expected[3]
+        actual_covered_text = actual[3]
+
+        if not expected_covered_text:
+            continue
+
+        for n in range(len(actual_covered_text)):
+            print(f"{n}: [{actual_covered_text[n]}] {hex(ord(actual_covered_text[n]))}")
+
+        if len(expected) >= 5:
+            expected_utf8_bytes = expected[4]
+            actual_utf8_bytes = bytes(actual_covered_text, "UTF-8")
+            assert actual_utf8_bytes == expected_utf8_bytes

From 381a7ec2d2b0189d439cf233f82c3320deb08346 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 10 Sep 2021 15:38:22 +0200
Subject: [PATCH 16/22] #209 - Parsing an array that was serialized using
 multipleReferencesAllowed=true fails

- Fixed problem by checking the multipleReferencesAllowed feature during deserialization
- Added test
---
 cassis/xmi.py                                 |  4 +--
 tests/fixtures.py                             | 28 +++++++++++++++++++
 ...ystem_with_multiple_references_allowed.xml | 19 +++++++++++++
 ...ltiple_references_allowed_string_array.xmi | 22 +++++++++++++++
 tests/test_xmi.py                             |  8 ++++--
 5 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_files/typesystems/typesystem_with_multiple_references_allowed.xml
 create mode 100644 tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi

diff --git a/cassis/xmi.py b/cassis/xmi.py
index f402a28..c05dd2e 100644
--- a/cassis/xmi.py
+++ b/cassis/xmi.py
@@ -203,7 +203,7 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b
                 elif typesystem.is_primitive_array(fs.type) and feature_name == "elements":
                     # Separately rendered arrays (typically used with multipleReferencesAllowed = True)
                     fs[feature_name] = self._parse_primitive_array(fs.type, value)
-                elif typesystem.is_primitive_array(feature.rangeType):
+                elif typesystem.is_primitive_array(feature.rangeType) and not feature.multipleReferencesAllowed:
                     # Array feature rendered inline (multipleReferencesAllowed = False|None)
                     # We also end up here for array features that were rendered as child elements. No need to parse
                     # them again, so we check if the value is still a string (i.e. attribute value) and only then
@@ -337,7 +337,7 @@ def _parse_feature_structure(self, typesystem: TypeSystem, elem, children: Dict[
         return AnnotationType(**attributes)
 
     def _parse_primitive_array(self, type_: Type, value: str) -> List:
-        """Primitive collections are serialized as white space seperated primitive values"""
+        """Primitive collections are serialized as white space separated primitive values"""
 
         # TODO: Use type name global variable here instead of hardcoded string literal
         elements = value.split(" ")
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 3a670f3..9cde40c 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -104,6 +104,20 @@ def cas_with_empty_array_references_xmi(cas_with_empty_array_references_path):
         return f.read()
 
 
+# CAS with multipleReferencesAllowed=true on string array
+
+
+@pytest.fixture
+def cas_with_multiple_references_allowed_string_array_path():
+    return os.path.join(FIXTURE_DIR, "xmi", "cas_with_multiple_references_allowed_string_array.xmi")
+
+
+@pytest.fixture
+def cas_with_multiple_references_allowed_string_array_xmi(cas_with_multiple_references_allowed_string_array_path):
+    with open(cas_with_multiple_references_allowed_string_array_path, "r") as f:
+        return f.read()
+
+
 # CAS with reserved names
 
 
@@ -273,6 +287,20 @@ def typesystem_with_collections_xml(typesystem_with_collections_path):
         return f.read()
 
 
+# CAS with multipleReferencesAllowed=true on string array
+
+
+@pytest.fixture
+def typesystem_with_multiple_references_allowed_path():
+    return os.path.join(FIXTURE_DIR, "typesystems", "typesystem_with_multiple_references_allowed.xml")
+
+
+@pytest.fixture
+def typesystem_with_multiple_references_allowed_xml(typesystem_with_multiple_references_allowed_path):
+    with open(typesystem_with_multiple_references_allowed_path, "r") as f:
+        return f.read()
+
+
 # DKPro types
 
 
diff --git a/tests/test_files/typesystems/typesystem_with_multiple_references_allowed.xml b/tests/test_files/typesystems/typesystem_with_multiple_references_allowed.xml
new file mode 100644
index 0000000..530a0f3
--- /dev/null
+++ b/tests/test_files/typesystems/typesystem_with_multiple_references_allowed.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>test.type</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>target</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.StringArray</rangeTypeName>
+                    <elementType>uima.cas.String</elementType>
+                    <multipleReferencesAllowed>true</multipleReferencesAllowed>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi b/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi
new file mode 100644
index 0000000..41577b0
--- /dev/null
+++ b/tests/test_files/xmi/cas_with_multiple_references_allowed_string_array.xmi
@@ -0,0 +1,22 @@
+<?xml version="1.0" ?>
+<xmi:XMI xmi:version="2.0" xmlns:cas="http:///uima/cas.ecore"
+		 xmlns:tcas="http:///uima/tcas.ecore"
+		 xmlns:xmi="http://www.omg.org/XMI"
+		 xmlns:test="http:///test.ecore">
+
+	<cas:NULL xmi:id="0"/>
+
+    <tcas:DocumentAnnotation xmi:id="1" sofa="1" begin="0" end="47" language="x-unspecified"/>
+
+    <test:type xmi:id="2" sofa="1" target="3"/>
+
+	<cas:StringArray xmi:id="3">
+		<elements>LNC</elements>
+		<elements>MTH</elements>
+		<elements>SNOMEDCT_US</elements>
+	</cas:StringArray>
+
+	<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text/plain"
+              sofaString="Joe waited for the train . The train was late ."/>
+	<cas:View members="1 2" sofa="1"/>
+</xmi:XMI>
diff --git a/tests/test_xmi.py b/tests/test_xmi.py
index 49b5875..39ed338 100644
--- a/tests/test_xmi.py
+++ b/tests/test_xmi.py
@@ -28,6 +28,10 @@
         pytest.lazy_fixture("cas_has_fs_with_no_namespace_xmi"),
         pytest.lazy_fixture("typesystem_has_types_with_no_namespace_xml"),
     ),
+    (
+        pytest.lazy_fixture("cas_with_multiple_references_allowed_string_array_xmi"),
+        pytest.lazy_fixture("typesystem_with_multiple_references_allowed_xml"),
+    ),
 ]
 
 
@@ -294,14 +298,14 @@ def test_offsets_work_for_empty_sofastring():
 # Leniency
 
 
-def test_leniency_type_not_in_typeystem_lenient(cas_with_leniency_xmi, small_typesystem_xml):
+def test_leniency_type_not_in_typesystem_lenient(cas_with_leniency_xmi, small_typesystem_xml):
     typesystem = load_typesystem(small_typesystem_xml)
 
     with pytest.warns(UserWarning):
         cas = load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=True)
 
 
-def test_leniency_type_not_in_typeystem_not_lenient(cas_with_leniency_xmi, small_typesystem_xml):
+def test_leniency_type_not_in_typesystem_not_lenient(cas_with_leniency_xmi, small_typesystem_xml):
     typesystem = load_typesystem(small_typesystem_xml)
 
     with pytest.raises(TypeNotFoundError):

From fbcda8e46c75d0bbb7fd9e1455e43b37321f238c Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Fri, 10 Sep 2021 15:43:46 +0200
Subject: [PATCH 17/22] #168 - Experimental JSON CAS support

- Better check whether adding a TextIOWrapper is necessary during serialization
- Fixed bad access to element type name
- Formatting
---
 cassis/json.py       |  6 +++---
 tests/performance.py | 20 ++++++++++++++++----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/cassis/json.py b/cassis/json.py
index d4e2fee..370188f 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -1,7 +1,7 @@
 import base64
 import json
 from collections import OrderedDict
-from io import TextIOWrapper
+from io import TextIOBase, TextIOWrapper
 
 from cassis.cas import NAME_DEFAULT_SOFA, Cas, IdGenerator, Sofa, View
 from cassis.typesystem import *
@@ -268,7 +268,7 @@ def serialize(
             json_fs = self._serialize_feature_structure(fs)
             feature_structures.append(json_fs)
 
-        if isinstance(sink, BytesIO):
+        if not isinstance(sink, TextIOBase):
             sink = TextIOWrapper(sink, encoding="utf-8", write_through=True)
 
         if sink:
@@ -316,7 +316,7 @@ def _serialize_feature(self, json_type, feature: Feature):
             json_feature[MULTIPLE_REFERENCES_ALLOWED_FIELD] = feature.multipleReferencesAllowed
 
         if feature.elementType is not None:
-            json_feature[ELEMENT_TYPE_FIELD] = self._to_external_type_name(feature.elementType)
+            json_feature[ELEMENT_TYPE_FIELD] = self._to_external_type_name(feature.elementType.name)
 
         return json_feature
 
diff --git a/tests/performance.py b/tests/performance.py
index aaff08a..69575de 100644
--- a/tests/performance.py
+++ b/tests/performance.py
@@ -13,8 +13,12 @@
 
 typesystem = generator.generate_type_system()
 randomized_cas = generator.generate_cas(typesystem)
+
 randomized_cas_xmi = randomized_cas.to_xmi()
+randomized_cas_xmi_bytes = randomized_cas_xmi.encode("utf-8")
+
 randomized_cas_json = randomized_cas.to_json()
+randomized_cas_json_bytes = randomized_cas_json.encode("utf-8")
 
 
 @pytest.mark.performance
@@ -24,7 +28,9 @@ def test_xmi_serialization_performance():
         randomized_cas.to_xmi()
     end = timer()
 
-    print(f"XMI: Serializing {iterations} CASes took {end - start} seconds")
+    print(
+        f"XMI: Serializing {iterations} CASes with {generator.size} each took {end - start} seconds ({len(randomized_cas_xmi_bytes)} bytes each)"
+    )
 
 
 @pytest.mark.performance
@@ -34,7 +40,9 @@ def test_json_serialization_performance():
         randomized_cas.to_json()
     end = timer()
 
-    print(f"JSON: Serializing {iterations} CASes took {end - start} seconds")
+    print(
+        f"JSON: Serializing {iterations} CASes with {generator.size} each took {end - start} seconds ({len(randomized_cas_json_bytes)} bytes each)"
+    )
 
 
 @pytest.mark.performance
@@ -44,7 +52,9 @@ def test_xmi_deserialization_performance():
         load_cas_from_xmi(randomized_cas_xmi, typesystem)
     end = timer()
 
-    print(f"XMI: Deserializing {iterations} CASes took {end - start} seconds")
+    print(
+        f"XMI: Deserializing {iterations} CASes with {generator.size} each took {end - start} seconds ({len(randomized_cas_xmi_bytes)} bytes each)"
+    )
 
 
 @pytest.mark.performance
@@ -54,4 +64,6 @@ def test_json_deserialization_performance():
         load_cas_from_json(randomized_cas_json, typesystem)
     end = timer()
 
-    print(f"JSON: Deserializing {iterations} CASes took {end - start} seconds")
+    print(
+        f"JSON: Deserializing {iterations} CASes with {generator.size} each took {end - start} seconds ({len(randomized_cas_json_bytes)} bytes each)"
+    )

From 1e97c37d270f1ee474f53a763176fa804f9abdb7 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Mon, 20 Sep 2021 17:17:57 +0200
Subject: [PATCH 18/22] #168 - Experimental JSON CAS support

- Better test if using a TextIOWrapper is really necessary
---
 cassis/json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cassis/json.py b/cassis/json.py
index 370188f..506df1b 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -268,7 +268,7 @@ def serialize(
             json_fs = self._serialize_feature_structure(fs)
             feature_structures.append(json_fs)
 
-        if not isinstance(sink, TextIOBase):
+        if sink and not isinstance(sink, TextIOBase):
             sink = TextIOWrapper(sink, encoding="utf-8", write_through=True)
 
         if sink:

From 555ed6fc6e08891b483d115bdc58b6429429eb21 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Mon, 20 Sep 2021 18:35:53 +0200
Subject: [PATCH 19/22] #168 - Experimental JSON CAS support

- Work around issues with cas_to_compareble_text and FSArrays
---
 cassis/util.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/cassis/util.py b/cassis/util.py
index 3bbac33..c54de57 100644
--- a/cassis/util.py
+++ b/cassis/util.py
@@ -7,7 +7,7 @@
 import attr
 
 from cassis import Cas
-from cassis.typesystem import FEATURE_BASE_NAME_SOFA, TYPE_NAME_ANNOTATION, FeatureStructure, Type
+from cassis.typesystem import FEATURE_BASE_NAME_SOFA, TYPE_NAME_ANNOTATION, FeatureStructure, Type, TypeSystem, is_array
 
 _EXCLUDED_FEATURES = {FEATURE_BASE_NAME_SOFA}
 _NULL_VALUE = "<NULL>"
@@ -21,7 +21,7 @@ def cas_to_comparable_text(
     covered_text: bool = True,
 ) -> [str, None]:
     indexed_feature_structures = _get_indexed_feature_structures(cas)
-    all_feature_structures_by_type = _group_feature_structures_by_type(cas._find_all_fs(seeds))
+    all_feature_structures_by_type = _group_feature_structures_by_type(cas._find_all_fs(seeds=seeds))
     types_sorted = sorted(all_feature_structures_by_type.keys())
     fs_id_to_anchor = _generate_anchors(
         cas, types_sorted, all_feature_structures_by_type, indexed_feature_structures, mark_indexed=mark_indexed
@@ -32,6 +32,11 @@ def cas_to_comparable_text(
 
     csv_writer = csv.writer(out, dialect=csv.unix_dialect)
     for t in types_sorted:
+        # FIXME This avoids problems with FSArrays which are indexed in a view - need to write a test case for
+        # FSArrays that are in the index and have as elements another set of FSArrays ...
+        if is_array(t):
+            continue
+
         type_ = cas.typesystem.get_type(t)
 
         csv_writer.writerow([type_.name])

From ceeabb74b63e2c4012abe8e1dea666370b71ec03 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Mon, 27 Sep 2021 17:29:52 +0200
Subject: [PATCH 20/22] #168 - Experimental JSON CAS support

- Support for floating point special values in JSON
- Support for not serializing the full type system in JSON but only the minimal or none at all
---
 cassis/cas.py                                 |  21 ++-
 cassis/json.py                                | 120 +++++++++++++++---
 cassis/typesystem.py                          |  46 ++++++-
 .../data.json                                 | 102 +++++++++++++++
 .../debug-typesystem.xml                      |  74 +++++++++++
 .../debug.xmi                                 |   7 +
 .../typesystem.xml                            |  74 +++++++++++
 tests/test_json.py                            |   1 +
 tests/test_typesystem.py                      |  33 +++++
 9 files changed, 453 insertions(+), 25 deletions(-)
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/data.json
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/debug-typesystem.xml
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/debug.xmi
 create mode 100644 tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/typesystem.xml

diff --git a/cassis/cas.py b/cassis/cas.py
index a65dcce..75d3af1 100644
--- a/cassis/cas.py
+++ b/cassis/cas.py
@@ -8,7 +8,7 @@
 from attr import validators
 from sortedcontainers import SortedKeyList
 
-from cassis.typesystem import TYPE_NAME_SOFA, FeatureStructure, TypeCheckError, TypeSystem
+from cassis.typesystem import TYPE_NAME_SOFA, FeatureStructure, TypeCheckError, TypeSystem, TypeSystemMode
 
 _validator_optional_string = validators.optional(validators.instance_of(str))
 
@@ -582,22 +582,33 @@ def to_xmi(self, path: Union[str, Path, None] = None, pretty_print: bool = False
         return self._serialize(CasXmiSerializer(), path, pretty_print=pretty_print)
 
     def to_json(
-        self, path: Union[str, Path, None] = None, pretty_print: bool = False, ensure_ascii=False
+        self,
+        path: Union[str, Path, None] = None,
+        pretty_print: bool = False,
+        ensure_ascii=False,
+        type_system_mode: TypeSystemMode = TypeSystemMode.FULL,
     ) -> Optional[str]:
         """Creates a JSON representation of this CAS.
 
         Args:
             path: File path, if `None` is provided the result is returned as a string
             pretty_print: `True` if the resulting JSON should be pretty-printed, else `False`
-
+            ensure_ascii: Whether to escape non-ASCII Unicode characters or not
+            type_system_mode: Whether to serialize the full type system (`FUL`), only the types used (`MINIMAL`), or no
+                              type system information at all (`NONE`)
 
         Returns:
             If `path` is None, then the JSON representation of this CAS is returned as a string
-
         """
         from cassis.json import CasJsonSerializer
 
-        return self._serialize(CasJsonSerializer(), path, pretty_print=pretty_print, ensure_ascii=ensure_ascii)
+        return self._serialize(
+            CasJsonSerializer(),
+            path,
+            pretty_print=pretty_print,
+            ensure_ascii=ensure_ascii,
+            type_system_mode=type_system_mode,
+        )
 
     def _serialize(self, serializer, path: Union[str, Path, None] = None, **kwargs):
         """Runs this CAS through the given serializer.
diff --git a/cassis/json.py b/cassis/json.py
index 506df1b..da9d66d 100644
--- a/cassis/json.py
+++ b/cassis/json.py
@@ -1,12 +1,17 @@
 import base64
 import json
+import math
 from collections import OrderedDict
 from io import TextIOBase, TextIOWrapper
+from math import isnan
 
 from cassis.cas import NAME_DEFAULT_SOFA, Cas, IdGenerator, Sofa, View
 from cassis.typesystem import *
 
 RESERVED_FIELD_PREFIX = "%"
+REF_FEATURE_PREFIX = "@"
+NUMBER_FEATURE_PREFIX = "#"
+ANCHOR_FEATURE_PREFIX = "^"
 TYPE_FIELD = RESERVED_FIELD_PREFIX + "TYPE"
 RANGE_FIELD = RESERVED_FIELD_PREFIX + "RANGE"
 TYPES_FIELD = RESERVED_FIELD_PREFIX + "TYPES"
@@ -15,7 +20,6 @@
 VIEW_SOFA_FIELD = RESERVED_FIELD_PREFIX + "SOFA"
 VIEW_MEMBERS_FIELD = RESERVED_FIELD_PREFIX + "MEMBERS"
 FEATURE_STRUCTURES_FIELD = RESERVED_FIELD_PREFIX + "FEATURE_STRUCTURES"
-REF_FEATURE_PREFIX = "@"
 NAME_FIELD = RESERVED_FIELD_PREFIX + "NAME"
 SUPER_TYPE_FIELD = RESERVED_FIELD_PREFIX + "SUPER_TYPE"
 DESCRIPTION_FIELD = RESERVED_FIELD_PREFIX + "DESCRIPTION"
@@ -26,6 +30,11 @@
 FLAG_DOCUMENT_ANNOTATION = "DocumentAnnotation"
 ARRAY_SUFFIX = "[]"
 ELEMENTS_FIELD = RESERVED_FIELD_PREFIX + "ELEMENTS"
+NAN_VALUE = "NaN"
+POSITIVE_INFINITE_VALUE = "Infinity"
+POSITIVE_INFINITE_VALUE_ABBR = "Inf"
+NEGATIVE_INFINITE_VALUE = "-Infinity"
+NEGATIVE_INFINITE_VALUE_ABBR = "-Inf"
 
 
 def load_cas_from_json(source: Union[IO, str], typesystem: TypeSystem = None) -> Cas:
@@ -193,6 +202,9 @@ def fix_up(elements):
             if key.startswith(REF_FEATURE_PREFIX):
                 ref_features[key[1:]] = value
                 attributes.pop(key)
+            if key.startswith(NUMBER_FEATURE_PREFIX):
+                attributes[key[1:]] = self._parse_float_value(value)
+                attributes.pop(key)
 
         self._max_xmi_id = max(attributes["xmiID"], self._max_xmi_id)
         fs = AnnotationType(**attributes)
@@ -200,16 +212,34 @@ def fix_up(elements):
         self._resolve_references(fs, ref_features, feature_structures)
 
         # Map from offsets in UIMA UTF-16 based offsets to Unicode codepoints
-        if typesystem.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION):
+        if typesystem.is_instance_of(fs.type, TYPE_NAME_ANNOTATION):
             sofa = fs.sofa
             fs.begin = sofa._offset_converter.uima_to_cassis(fs.begin)
             fs.end = sofa._offset_converter.uima_to_cassis(fs.end)
 
         return fs
 
+    def _parse_float_value(self, value: Union[str, float]) -> float:
+        if isinstance(value, float):
+            return value
+        elif value == NAN_VALUE:
+            return float("nan")
+        elif value == POSITIVE_INFINITE_VALUE or value == POSITIVE_INFINITE_VALUE_ABBR:
+            return float("inf")
+        elif value == NEGATIVE_INFINITE_VALUE or value == NEGATIVE_INFINITE_VALUE_ABBR:
+            return float("-inf")
+
+        raise ValueError(
+            f"Illegal floating point value [{value}]. Must be a float literal or one of {NAN_VALUE}, "
+            f"{POSITIVE_INFINITE_VALUE}, {POSITIVE_INFINITE_VALUE_ABBR}, {NEGATIVE_INFINITE_VALUE}, or "
+            f"{NEGATIVE_INFINITE_VALUE_ABBR}"
+        )
+
     def _parse_primitive_array(self, type_name: str, elements: [list, str]) -> List:
-        if type_name == TYPE_NAME_BYTE_ARRAY:
+        if elements and type_name == TYPE_NAME_BYTE_ARRAY:
             return base64.b64decode(elements)
+        if elements and (type_name == TYPE_NAME_FLOAT_ARRAY or type_name == TYPE_NAME_DOUBLE_ARRAY):
+            return [self._parse_float_value(v) for v in elements]
         else:
             return elements
 
@@ -242,21 +272,19 @@ def __init__(self):
         pass
 
     def serialize(
-        self, sink: Union[IO, str, None], cas: Cas, pretty_print: bool = True, ensure_ascii: bool = False
+        self,
+        sink: Union[IO, str, None],
+        cas: Cas,
+        pretty_print: bool = True,
+        ensure_ascii: bool = False,
+        type_system_mode: TypeSystemMode = TypeSystemMode.FULL,
     ) -> Union[str, None]:
-        data = {}
-        types = data[TYPES_FIELD] = {}
-        views = data[VIEWS_FIELD] = {}
-        feature_structures = data[FEATURE_STRUCTURES_FIELD] = []
-
-        for type_ in cas.typesystem.get_types():
-            if type_.name == TYPE_NAME_DOCUMENT_ANNOTATION:
-                continue
-            json_type = self._serialize_type(type_)
-            types[json_type[NAME_FIELD]] = json_type
+        feature_structures = []
 
+        views = {}
         for view in cas.views:
             views[view.sofa.sofaID] = self._serialize_view(view)
+
             if view.sofa.sofaArray:
                 json_sofa_array_fs = self._serialize_feature_structure(view.sofa.sofaArray)
                 feature_structures.append(json_sofa_array_fs)
@@ -264,17 +292,52 @@ def serialize(
             feature_structures.append(json_sofa_fs)
 
         # Find all fs, even the ones that are not directly added to a sofa
+        used_types = set()
         for fs in sorted(cas._find_all_fs(include_inlinable_arrays=True), key=lambda a: a.xmiID):
+            used_types.add(fs.type)
             json_fs = self._serialize_feature_structure(fs)
             feature_structures.append(json_fs)
 
+        types = None
+        if type_system_mode is not TypeSystemMode.NONE:
+            types = {}
+
+            if type_system_mode is TypeSystemMode.MINIMAL:
+                # Build transitive closure of used types by following parents, features, etc.
+                types_to_include = cas.typesystem.transitive_closure(used_types)
+            elif type_system_mode is TypeSystemMode.FULL:
+                types_to_include = cas.typesystem.get_types()
+
+            for type_ in types_to_include:
+                if type_.name == TYPE_NAME_DOCUMENT_ANNOTATION:
+                    continue
+                json_type = self._serialize_type(type_)
+                types[json_type[NAME_FIELD]] = json_type
+
+        data = {}
+        if types is not None:
+            data[TYPES_FIELD] = types
+        if feature_structures is not None:
+            data[FEATURE_STRUCTURES_FIELD] = feature_structures
+        if views is not None:
+            data[VIEWS_FIELD] = views
+
         if sink and not isinstance(sink, TextIOBase):
             sink = TextIOWrapper(sink, encoding="utf-8", write_through=True)
 
         if sink:
-            json.dump(data, sink, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii)
+            json.dump(
+                data,
+                sink,
+                sort_keys=False,
+                indent=2 if pretty_print else None,
+                ensure_ascii=ensure_ascii,
+                allow_nan=False,
+            )
         else:
-            return json.dumps(data, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii)
+            return json.dumps(
+                data, sort_keys=False, indent=2 if pretty_print else None, ensure_ascii=ensure_ascii, allow_nan=False
+            )
 
         if isinstance(sink, TextIOWrapper):
             sink.detach()  # Prevent TextIOWrapper from closing the BytesIO
@@ -288,9 +351,11 @@ def _serialize_type(self, type_: Type):
         json_type = {
             NAME_FIELD: type_name,
             SUPER_TYPE_FIELD: supertype_name,
-            DESCRIPTION_FIELD: type_.description,
         }
 
+        if type_.description:
+            json_type[DESCRIPTION_FIELD] = type_.description
+
         for feature in list(type_.features):
             json_feature = self._serialize_feature(json_type, feature)
             json_type[json_feature[NAME_FIELD]] = json_feature
@@ -331,6 +396,10 @@ def _serialize_feature_structure(self, fs) -> dict:
             if fs.elements:
                 json_fs[ELEMENTS_FIELD] = base64.b64encode(bytes(fs.elements)).decode("ascii")
             return json_fs
+        elif type_name in {TYPE_NAME_DOUBLE_ARRAY, TYPE_NAME_FLOAT_ARRAY}:
+            if fs.elements:
+                json_fs[ELEMENTS_FIELD] = [self._serialize_float_value(e) for e in fs.elements]
+            return json_fs
         elif is_primitive_array(fs.type):
             if fs.elements:
                 json_fs[ELEMENTS_FIELD] = fs.elements
@@ -360,13 +429,28 @@ def _serialize_feature_structure(self, fs) -> dict:
                 sofa: Sofa = getattr(fs, "sofa")
                 value = sofa._offset_converter.cassis_to_uima(value)
 
-            if is_primitive(feature.rangeType):
+            if feature.rangeType.name in {TYPE_NAME_DOUBLE, TYPE_NAME_FLOAT}:
+                float_value = self._serialize_float_value(value)
+                if isinstance(float_value, str):
+                    feature_name = NUMBER_FEATURE_PREFIX + feature_name
+                json_fs[feature_name] = self._serialize_float_value(value)
+            elif is_primitive(feature.rangeType):
                 json_fs[feature_name] = value
             else:
                 # We need to encode non-primitive features as a reference
                 json_fs[REF_FEATURE_PREFIX + feature_name] = self._serialize_ref(value)
         return json_fs
 
+    def _serialize_float_value(self, value) -> Union[float, str]:
+        if isnan(value):
+            return NAN_VALUE
+        elif math.isinf(value):
+            if value > 0:
+                return POSITIVE_INFINITE_VALUE
+            else:
+                return NEGATIVE_INFINITE_VALUE
+        return value
+
     def _serialize_ref(self, fs) -> int:
         if not fs:
             return None
diff --git a/cassis/typesystem.py b/cassis/typesystem.py
index 026daeb..c9e3244 100644
--- a/cassis/typesystem.py
+++ b/cassis/typesystem.py
@@ -1,10 +1,11 @@
 import re
 import warnings
 from collections import defaultdict
+from enum import Enum, auto
 from io import BytesIO
 from itertools import chain, filterfalse
 from pathlib import Path
-from typing import IO, Any, Callable, Dict, Iterator, List, Optional, Union
+from typing import IO, Any, Callable, Dict, Iterator, List, Optional, Set, Union
 
 import attr
 import deprecation
@@ -172,6 +173,14 @@
 _ARRAY_TYPES = _PRIMITIVE_ARRAY_TYPES | {"uima.cas.FSArray"}
 
 
+class TypeSystemMode(Enum):
+    """How much type system information to include."""
+
+    FULL = auto()
+    MINIMAL = auto()
+    NONE = auto()
+
+
 def _string_to_valid_classname(name: str):
     return re.sub("[^a-zA-Z0-9_]", "_", name)
 
@@ -402,7 +411,7 @@ def __lt__(self, other):
         return self.name < other.name
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, hash=False, eq=True)
 class Type:
     """Describes types in a type system.
 
@@ -584,6 +593,12 @@ def subsumes(self, other_type: "Type") -> bool:
 
         return False
 
+    def __hash__(self):
+        return hash(self.name)
+
+    def __eq__(self, other):
+        return self.name == other.name
+
 
 class TypeSystem:
     def __init__(self, add_document_annotation_type: bool = True):
@@ -967,6 +982,33 @@ def _add_document_annotation_type(self):
         t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName="uima.tcas.Annotation")
         self.create_feature(t, name="language", rangeType="uima.cas.String")
 
+    def transitive_closure(self, seed_types: Set[Type], built_in: bool = False) -> Set[Type]:
+        # Build transitive closure of used types by following parents, features, etc.
+        transitively_referenced_types = set()
+        openlist = []
+        openlist.extend(seed_types)
+        while openlist:
+            type_ = openlist.pop(0)
+
+            if type_ in transitively_referenced_types:
+                continue
+
+            if not built_in and type_.name in _PREDEFINED_TYPES:
+                continue
+
+            transitively_referenced_types.add(type_)
+
+            if type_.supertype and type_.supertype not in transitively_referenced_types:
+                openlist.append(type_.supertype)
+
+            for feature in type_.all_features:
+                if feature.rangeType not in transitively_referenced_types:
+                    openlist.append(feature.rangeType)
+                if feature.elementType and feature.elementType not in transitively_referenced_types:
+                    openlist.append(feature.elementType)
+
+        return transitively_referenced_types
+
 
 # Deserializing
 
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/data.json b/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/data.json
new file mode 100644
index 0000000..0d97fb3
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/data.json
@@ -0,0 +1,102 @@
+{
+  "%FEATURE_STRUCTURES": [
+    {
+      "%ID": 1,
+      "%TYPE": "uima.cas.Sofa",
+      "sofaID": "_InitialView",
+      "sofaNum": 1
+    },
+    {
+      "#doubleNan": "NaN",
+      "#doubleNegInfinity": "-Infinity",
+      "#doublePosInfinity": "Infinity",
+      "#floatNan": "NaN",
+      "#floatNegInfinity": "-Infinity",
+      "#floatPosInfinity": "Infinity",
+      "%ID": 1,
+      "%TYPE": "SpecialValuesType",
+      "doubleOne": 1.0,
+      "doubleZero": 0.0,
+      "floatOne": 1.0,
+      "floatZero": 0.0
+    },
+    {
+      "%ELEMENTS": [
+        0.0,
+        1.0,
+        "-Infinity",
+        "Infinity",
+        "NaN"
+      ],
+      "%ID": 2,
+      "%TYPE": "uima.cas.DoubleArray"
+    },
+    {
+      "%ELEMENTS": [
+        0.0,
+        1.0,
+        "-Infinity",
+        "Infinity",
+        "NaN"
+      ],
+      "%ID": 3,
+      "%TYPE": "uima.cas.FloatArray"
+    }
+  ],
+  "%TYPES": {
+    "SpecialValuesType": {
+      "%NAME": "SpecialValuesType",
+      "%SUPER_TYPE": "uima.cas.TOP",
+      "doubleNan": {
+        "%NAME": "doubleNan",
+        "%RANGE": "uima.cas.Double"
+      },
+      "doubleNegInfinity": {
+        "%NAME": "doubleNegInfinity",
+        "%RANGE": "uima.cas.Double"
+      },
+      "doubleOne": {
+        "%NAME": "doubleOne",
+        "%RANGE": "uima.cas.Double"
+      },
+      "doublePosInfinity": {
+        "%NAME": "doublePosInfinity",
+        "%RANGE": "uima.cas.Double"
+      },
+      "doubleZero": {
+        "%NAME": "doubleZero",
+        "%RANGE": "uima.cas.Double"
+      },
+      "floatNan": {
+        "%NAME": "floatNan",
+        "%RANGE": "uima.cas.Float"
+      },
+      "floatNegInfinity": {
+        "%NAME": "floatNegInfinity",
+        "%RANGE": "uima.cas.Float"
+      },
+      "floatOne": {
+        "%NAME": "floatOne",
+        "%RANGE": "uima.cas.Float"
+      },
+      "floatPosInfinity": {
+        "%NAME": "floatPosInfinity",
+        "%RANGE": "uima.cas.Float"
+      },
+      "floatZero": {
+        "%NAME": "floatZero",
+        "%RANGE": "uima.cas.Float"
+      }
+    }
+  },
+  "%VIEWS": {
+    "_InitialView": {
+      "%MEMBERS": [
+        1,
+        2,
+        3
+      ],
+      "%SOFA": 1
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/debug-typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/debug-typesystem.xml
new file mode 100644
index 0000000..9a8766d
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/debug-typesystem.xml
@@ -0,0 +1,74 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+        <typeDescription>
+            <name>SpecialValuesType</name>
+            <description/>
+            <supertypeName>uima.cas.TOP</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>doubleZero</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>doubleOne</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>doublePosInfinity</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>doubleNegInfinity</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>doubleNan</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatZero</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatOne</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatPosInfinity</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatNegInfinity</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatNan</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/debug.xmi b/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/debug.xmi
new file mode 100644
index 0000000..e02d4cb
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/debug.xmi
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:noNamespace="http:///uima/noNamespace.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
+    <cas:NULL xmi:id="0"/>
+    <noNamespace:SpecialValuesType xmi:id="1" doubleZero="0.0" doubleOne="1.0" doublePosInfinity="Infinity" doubleNegInfinity="-Infinity" doubleNan="NaN" floatZero="0.0" floatOne="1.0" floatPosInfinity="Infinity" floatNegInfinity="-Infinity" floatNan="NaN"/>
+    <cas:DoubleArray xmi:id="2" elements="0.0 1.0 -Infinity Infinity NaN"/>
+    <cas:FloatArray xmi:id="3" elements="0.0 1.0 -Infinity Infinity NaN"/>
+    <cas:View members="1 2 3"/>
+</xmi:XMI>
diff --git a/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/typesystem.xml b/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/typesystem.xml
new file mode 100644
index 0000000..9a8766d
--- /dev/null
+++ b/tests/test_files/json/fs_as_array/ser-ref/casWithFloatingPointSpecialValues/typesystem.xml
@@ -0,0 +1,74 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <types>
+        <typeDescription>
+            <name>uima.tcas.DocumentAnnotation</name>
+            <description/>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>language</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.String</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+        <typeDescription>
+            <name>SpecialValuesType</name>
+            <description/>
+            <supertypeName>uima.cas.TOP</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>doubleZero</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>doubleOne</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>doublePosInfinity</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>doubleNegInfinity</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>doubleNan</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Double</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatZero</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatOne</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatPosInfinity</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatNegInfinity</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+                <featureDescription>
+                    <name>floatNan</name>
+                    <description/>
+                    <rangeTypeName>uima.cas.Float</rangeTypeName>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
diff --git a/tests/test_json.py b/tests/test_json.py
index cc02c56..3889b11 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -10,6 +10,7 @@
 FIXTURES = [
     (os.path.join(FIXTURE_DIR, "casWithSofaDataArray"), []),
     (os.path.join(FIXTURE_DIR, "casWithSofaDataURI"), []),
+    (os.path.join(FIXTURE_DIR, "casWithFloatingPointSpecialValues"), []),
     (os.path.join(FIXTURE_DIR, "casWithText"), [["uima.tcas.DocumentAnnotation", 0, 15, "This is a test."]]),
     (
         os.path.join(FIXTURE_DIR, "casWithoutTextButWithAnnotations"),
diff --git a/tests/test_typesystem.py b/tests/test_typesystem.py
index 82f8264..7d3a70a 100644
--- a/tests/test_typesystem.py
+++ b/tests/test_typesystem.py
@@ -7,9 +7,14 @@
 from cassis.typesystem import (
     _COLLECTION_TYPES,
     TOP_TYPE_NAME,
+    TYPE_NAME_ANNOTATION,
+    TYPE_NAME_ANNOTATION_BASE,
+    TYPE_NAME_ARRAY_BASE,
     TYPE_NAME_BOOLEAN,
     TYPE_NAME_INTEGER,
+    TYPE_NAME_SOFA,
     TYPE_NAME_STRING,
+    TYPE_NAME_STRING_ARRAY,
     TYPE_NAME_TOP,
     Feature,
     TypeCheckError,
@@ -861,3 +866,31 @@ def test_create_same_type_twice_fails():
     typesystem.create_type("my.Type")
     with pytest.raises(ValueError):
         typesystem.create_type("my.Type")
+
+
+def test_transitive_closure():
+    typesystem = TypeSystem()
+    base_type = typesystem.create_type("BaseType", supertypeName=TYPE_NAME_ANNOTATION)
+    child_type = typesystem.create_type("ChildType", supertypeName="BaseType")
+    typesystem.create_feature("ChildType", "primitiveFeature", TYPE_NAME_STRING)
+    typesystem.create_feature("ChildType", "arrayFeature", TYPE_NAME_STRING_ARRAY, elementType=TYPE_NAME_STRING)
+    typesystem.create_feature("ChildType", "fsFeature", "BaseType")
+
+    transitive_closure_without_builtins = typesystem.transitive_closure({child_type}, built_in=False)
+
+    assert transitive_closure_without_builtins == {base_type, child_type}
+
+    transitive_closure_with_builtins = typesystem.transitive_closure({child_type}, built_in=True)
+
+    assert transitive_closure_with_builtins == {
+        base_type,
+        child_type,
+        typesystem.get_type(TYPE_NAME_TOP),
+        typesystem.get_type(TYPE_NAME_ANNOTATION_BASE),
+        typesystem.get_type(TYPE_NAME_ANNOTATION),
+        typesystem.get_type(TYPE_NAME_STRING),
+        typesystem.get_type(TYPE_NAME_ARRAY_BASE),
+        typesystem.get_type(TYPE_NAME_STRING_ARRAY),
+        typesystem.get_type(TYPE_NAME_INTEGER),
+        typesystem.get_type(TYPE_NAME_SOFA),
+    }

From 03b14afc4101c233b8e1be3da8170b25c83b07eb Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Wed, 29 Sep 2021 11:17:04 +0200
Subject: [PATCH 21/22] #192 - Cleanup stuff

- Run pyupgrade
---
 tests/test_typesystem.py | 8 ++++----
 tests/test_xmi.py        | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_typesystem.py b/tests/test_typesystem.py
index 96746d5..e2b9df2 100644
--- a/tests/test_typesystem.py
+++ b/tests/test_typesystem.py
@@ -292,7 +292,7 @@ def test_is_instance_of(child_name: str, parent_name: str, expected: bool):
     # manually load the type system
     path = os.path.join(FIXTURE_DIR, "typesystems", "important_dkpro_types.xml")
 
-    with open(path, "r") as f:
+    with open(path) as f:
         ts = load_typesystem(f.read())
 
     assert ts.is_instance_of(child_name, parent_name) == expected
@@ -648,7 +648,7 @@ def test_that_typesystem_with_redefined_documentation_annotation_works(
     ],
 )
 def test_that_merging_compatible_typesystem_works(name, rangeTypeName, elementType, multipleReferencesAllowed):
-    with open(typesystem_merge_base_path(), "r") as f:
+    with open(typesystem_merge_base_path()) as f:
         base = load_typesystem(f.read())
 
     ts = TypeSystem()
@@ -682,7 +682,7 @@ def test_that_merging_compatible_typesystem_works(name, rangeTypeName, elementTy
     ],
 )
 def test_that_merging_incompatible_typesystem_throws(name, rangeTypeName, elementType, multipleReferencesAllowed):
-    with open(typesystem_merge_base_path(), "r") as f:
+    with open(typesystem_merge_base_path()) as f:
         base = load_typesystem(f.read())
 
     ts = TypeSystem()
@@ -697,7 +697,7 @@ def test_that_merging_incompatible_typesystem_throws(name, rangeTypeName, elemen
 
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", category=UserWarning)
-        with pytest.raises(ValueError, match=r".*\[{0}\].*".format(name)):
+        with pytest.raises(ValueError, match=fr".*\[{name}\].*"):
             merge_typesystems(base, ts)
 
 
diff --git a/tests/test_xmi.py b/tests/test_xmi.py
index 17c7e6e..5c5de01 100644
--- a/tests/test_xmi.py
+++ b/tests/test_xmi.py
@@ -181,7 +181,7 @@ def test_serializing_cas_to_file_path(tmpdir, xmi, typesystem_xml):
 
     cas.to_xmi(path)
 
-    with open(path, "r") as actual:
+    with open(path) as actual:
         assert_xml_equal(actual.read(), xmi)
 
 

From e89ada4bf40d09be22072d48e256adb31fb01bb1 Mon Sep 17 00:00:00 2001
From: Richard Eckart de Castilho <richard.eckart@gmail.com>
Date: Sun, 12 Dec 2021 21:37:17 +0100
Subject: [PATCH 22/22] #168 - Experimental JSON CAS support

- Added mention about non-final status in README file
---
 README.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.rst b/README.rst
index aac0fe2..cbb308d 100644
--- a/README.rst
+++ b/README.rst
@@ -59,6 +59,7 @@ Some features are still under development, e.g.
 
 - Proper type checking
 - XML/XMI schema validation
+- UIMA JSON CAS support (the format is not yet finalized)
 
 Installation
 ------------