kolenaIO · gordonhart · May 11, 2023 · May 9, 2023 · May 9, 2023 · May 9, 2023
@@ -33,11 +33,13 @@ jobs:
             - /home/circleci/project/.poetry/virtualenvs
             - poetry.lock
       - run: poetry run python3 -c 'import kolena'
+      # TODO: fix underlying mypy issues with Python>3.9 rather than skipping
       - when:
           condition:
             not:
-              # TODO: upgrade mypy version to address # https://github.com/python/mypy/issues/13627
-              equal: [ "3.10", << parameters.python-version >> ]
+              or:
+                - equal: [ "3.10", << parameters.python-version >> ]
+                - equal: [ "3.11", << parameters.python-version >> ]
           steps:
             - run: poetry run pre-commit run -a
       - run:
@@ -108,7 +110,7 @@ workflows:
           name: unit-test-<< matrix.python-version >>
           matrix:
             parameters:
-              python-version: [ "3.7", "3.8", "3.9", "3.10" ]
+              python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
       - integration-test:
           matrix:
             parameters:

@@ -25,11 +25,11 @@ class Path(str, Enum):
 
         @classmethod
         def upload_signed_url(cls, load_uuid: str) -> str:
-            return f"{cls.UPLOAD_SIGNED_URL_STUB}/{load_uuid}"
+            return f"{cls.UPLOAD_SIGNED_URL_STUB.value}/{load_uuid}"
 
         @classmethod
         def download_by_path(cls, path: str) -> str:
-            return f"{cls.DOWNLOAD_BY_PATH_STUB}/{path}"
+            return f"{cls.DOWNLOAD_BY_PATH_STUB.value}/{path}"
 
     @dataclass(frozen=True)
     class WithLoadUUID:

@@ -30,7 +30,8 @@ def absolute_locator(self, test_run_id: int, load_uuid: str, image_id: int, key:
     def relative_locator(self, path_stub: str) -> str:
         return f"{self.prefix}/{path_stub}"
 
-    def path_stub(self, test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
+    @staticmethod
+    def path_stub(test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
         return f"{test_run_id}/{image_id}/{key}-{load_uuid}.png"
 
     def _absolute_locator(self, relative_locator: str) -> str:

@@ -26,26 +26,20 @@
 import numpy as np
 import pandas as pd
 import requests
-from PIL import Image
-from requests_toolbelt import MultipartEncoder
 from retrying import retry
 
 from kolena._api.v1.batched_load import BatchedLoad as API
-from kolena._api.v1.fr import Asset as AssetAPI
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils.asset_path_mapper import AssetPathMapper
 from kolena._utils.datatypes import LoadableDataFrame
 from kolena._utils.serde import from_dict
-from kolena.fr._consts import _BatchSize
-from kolena.fr.datatypes import _ImageChipsDataFrame
 
 VALIDATION_COUNT_LIMIT = 100
 STAGE_STATUS__LOADED = "LOADED"
 
 
 def init_upload() -> API.InitiateUploadResponse:
-    init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD)
+    init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD.value)
     krequests.raise_for_status(init_res)
     init_response = from_dict(data_class=API.InitiateUploadResponse, data=init_res.json())
     return init_response
@@ -78,46 +72,6 @@ def upload_data_frame_chunk(df_chunk: pd.DataFrame, load_uuid: str) -> None:
     krequests.raise_for_status(upload_response)
 
 
-def upload_image_chips(
-    df: _ImageChipsDataFrame,
-    path_mapper: AssetPathMapper,
-    batch_size: int = _BatchSize.UPLOAD_CHIPS,
-) -> None:
-    def upload_batch(df_batch: _ImageChipsDataFrame) -> None:
-        df_batch = df_batch.reset_index(drop=True)  # reset indices so we match the signed_url indices
-
-        def as_buffer(image_raw: np.ndarray) -> io.BytesIO:
-            pil_image = Image.fromarray(image_raw).convert("RGB")
-            buf = io.BytesIO()
-            pil_image.save(buf, "png")
-            buf.seek(0)
-            return buf
-
-        data = MultipartEncoder(
-            fields=[
-                (
-                    "files",
-                    (
-                        path_mapper.path_stub(row["test_run_id"], row["uuid"], row["image_id"], row["key"]),
-                        as_buffer(row["image"]),
-                    ),
-                )
-                for _, row in df_batch.iterrows()
-            ],
-        )
-        upload_response = krequests.put(
-            endpoint_path=AssetAPI.Path.BULK_UPLOAD,
-            data=data,
-            headers={"Content-Type": data.content_type},
-        )
-        krequests.raise_for_status(upload_response)
-
-    num_chunks = math.ceil(len(df) / batch_size)
-    chunk_iter = np.array_split(df, num_chunks) if len(df) > 0 else []
-    for df_chunk in chunk_iter:
-        upload_batch(df_chunk)
-
-
 DFType = TypeVar("DFType", bound=LoadableDataFrame)
 
 
@@ -157,7 +111,7 @@ def complete_load(uuid: Optional[str]) -> None:
             return
         complete_request = API.CompleteDownloadRequest(uuid=uuid)
         complete_res = krequests.put(
-            endpoint_path=API.Path.COMPLETE_DOWNLOAD,
+            endpoint_path=API.Path.COMPLETE_DOWNLOAD.value,
             data=json.dumps(dataclasses.asdict(complete_request)),
         )
         krequests.raise_for_status(complete_res)

@@ -14,8 +14,8 @@
 from enum import Enum
 
 
-class _BatchSize(int, Enum):
-    UPLOAD_CHIPS = 5_000
+class BatchSize(int, Enum):
+    UPLOAD_CHIPS = 1_000
     UPLOAD_RECORDS = 10_000_000
     UPLOAD_RESULTS = 1_000_000
 

@@ -54,15 +54,15 @@ def upload_log(message: str, status: str) -> None:
         message=message,
         status=status,
     )
-    krequests.post(endpoint_path=API.Path.UPLOAD, json=dataclasses.asdict(request))
+    krequests.post(endpoint_path=API.Path.UPLOAD.value, json=dataclasses.asdict(request))
 
 
 def log_telemetry(e: BaseException) -> None:
     try:
         stack = tb.format_stack()
         exc_format = tb.format_exception(None, e, e.__traceback__)
         combined = stack + exc_format
-        upload_log("".join(combined), DatadogLogLevels.ERROR)
+        upload_log("".join(combined), DatadogLogLevels.ERROR.value)
     except BaseException:
         """
         Attempting to upload the telemetry is best-effort. We don't want to have exceptions in that

@@ -21,7 +21,7 @@
 
 def create(repository: str) -> None:
     response = krequests.post(
-        endpoint_path=Path.CREATE,
+        endpoint_path=Path.CREATE.value,
         data=json.dumps(dataclasses.asdict(CreateRepositoryRequest(repository=repository))),
     )
     krequests.raise_for_status(response)
@@ -33,9 +33,9 @@
 from kolena._api.v1.workflow import WorkflowType
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import DFType
+from kolena._utils.consts import BatchSize
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import WithTelemetry
 from kolena._utils.serde import from_dict
@@ -93,7 +93,7 @@ def __init__(self, name: str, workflow: WorkflowType, metadata: Optional[Dict[st
     def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) -> CoreAPI.EntityData:
         log.info(f"creating new model '{name}'")
         request = CoreAPI.CreateRequest(name=name, metadata=metadata, workflow=workflow.value)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         log.success(f"created new model '{name}'")
         return from_dict(data_class=CoreAPI.EntityData, data=res.json())
@@ -102,7 +102,7 @@ def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) ->
     @validate_arguments(config=ValidatorConfig)
     def _load_by_name(cls, name: str) -> CoreAPI.EntityData:
         request = CoreAPI.LoadByNameRequest(name=name)
-        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         return from_dict(data_class=CoreAPI.EntityData, data=res.json())
 
@@ -131,7 +131,7 @@ def iter_inferences(
     def _iter_inference_batch_for_reference(
         self,
         test_object: Union[_TestCaseClass, _TestSuiteClass],
-        batch_size: int = _BatchSize.LOAD_SAMPLES,
+        batch_size: int = BatchSize.LOAD_SAMPLES.value,
     ) -> Iterator[_LoadInferencesDataFrameClass]:
         if batch_size <= 0:
             raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
@@ -143,7 +143,7 @@ def _iter_inference_batch_for_reference(
         init_request = API.InitLoadInferencesRequest(**params)
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_INFERENCES,
+            endpoint_path=API.Path.INIT_LOAD_INFERENCES.value,
             df_class=self._LoadInferencesDataFrameClass,
         )
         log.success(f"loaded inferences from model '{self.name}' on {test_object_display_name}")
@@ -166,7 +166,7 @@ def load_inferences_by_test_case(
     def _iter_inference_batch_for_test_suite(
         self,
         test_suite: _TestSuiteClass,
-        batch_size: int = _BatchSize.LOAD_SAMPLES,
+        batch_size: int = BatchSize.LOAD_SAMPLES.value,
     ) -> Iterator[_LoadInferencesDataFrameClass]:
         if batch_size <= 0:
             raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
@@ -175,7 +175,7 @@ def _iter_inference_batch_for_test_suite(
         init_request = API.InitLoadInferencesByTestCaseRequest(**params)
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE,
+            endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE.value,
             df_class=self._LoadInferencesDataFrameClass,
         )
         log.success(f"loaded inferences from model '{self.name}' on test suite '{test_suite.name}'")

@@ -30,11 +30,11 @@
 from kolena._api.v1.workflow import WorkflowType
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import DFType
 from kolena._utils.batched_load import init_upload
 from kolena._utils.batched_load import upload_data_frame
+from kolena._utils.consts import BatchSize
 from kolena._utils.dataframes.validators import validate_df_schema
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import WithTelemetry
@@ -128,7 +128,7 @@ def _create(
         """Create a new test case with the provided name."""
         log.info(f"creating new test case '{name}'")
         request = CoreAPI.CreateRequest(name=name, description=description or "", workflow=workflow.value)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
         obj = cls._create_from_data(data)
@@ -142,7 +142,7 @@ def _create(
     def _load_by_name(cls, name: str, version: Optional[int] = None) -> CoreAPI.EntityData:
         """Load an existing test case with the provided name."""
         request = CoreAPI.LoadByNameRequest(name=name, version=version)
-        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         return from_dict(data_class=CoreAPI.EntityData, data=res.json())
 
@@ -173,10 +173,10 @@ def load_images(self) -> List[_TestImageClass]:
     def iter_images(self) -> Iterator[_TestImageClass]:
         """Iterate through all images with their associated ground truths in this test case."""
         log.info(f"loading test images for test case '{self.name}'")
-        init_request = CoreAPI.InitLoadContentsRequest(batch_size=_BatchSize.LOAD_SAMPLES, test_case_id=self._id)
+        init_request = CoreAPI.InitLoadContentsRequest(batch_size=BatchSize.LOAD_SAMPLES.value, test_case_id=self._id)
         for df in _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_IMAGES,
+            endpoint_path=API.Path.INIT_LOAD_IMAGES.value,
             df_class=self._TestImageDataFrameClass,
         ):
             for record in df.itertuples():
@@ -312,7 +312,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
         init_response = init_upload()
         df = self._to_data_frame(list(editor._images.values()))
         df_serialized = df.as_serializable()
-        upload_data_frame(df=df_serialized, batch_size=_BatchSize.UPLOAD_RECORDS, load_uuid=init_response.uuid)
+        upload_data_frame(df=df_serialized, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=init_response.uuid)
 
         request = CoreAPI.CompleteEditRequest(
             test_case_id=self._id,
@@ -322,7 +322,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
             uuid=init_response.uuid,
         )
         complete_res = krequests.put(
-            endpoint_path=API.Path.COMPLETE_EDIT,
+            endpoint_path=API.Path.COMPLETE_EDIT.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(complete_res)