From 591b9eedab5a3624714d0ff730a2f7f387b9c31f Mon Sep 17 00:00:00 2001 From: mertalev <101130780+mertalev@users.noreply.github.com> Date: Sat, 22 Jun 2024 15:51:43 -0400 Subject: [PATCH 1/4] refactor model sessions --- machine-learning/app/models/base.py | 135 +++--------------- .../models/facial_recognition/recognition.py | 30 ++-- machine-learning/app/models/session.py | 0 machine-learning/app/schemas.py | 12 ++ machine-learning/app/sessions/__init__.py | 5 + .../app/{models => sessions}/ann.py | 5 +- machine-learning/app/sessions/ort.py | 129 +++++++++++++++++ machine-learning/app/test_main.py | 6 +- 8 files changed, 191 insertions(+), 131 deletions(-) delete mode 100644 machine-learning/app/models/session.py create mode 100644 machine-learning/app/sessions/__init__.py rename machine-learning/app/{models => sessions}/ann.py (94%) create mode 100644 machine-learning/app/sessions/ort.py diff --git a/machine-learning/app/models/base.py b/machine-learning/app/models/base.py index 4ad6fd6eb7049..cad8239766468 100644 --- a/machine-learning/app/models/base.py +++ b/machine-learning/app/models/base.py @@ -5,15 +5,14 @@ from shutil import rmtree from typing import Any, ClassVar -import onnxruntime as ort from huggingface_hub import snapshot_download import ann.ann -from app.models.constants import SUPPORTED_PROVIDERS +from app.sessions.ort import OrtSession from ..config import clean_name, log, settings from ..schemas import ModelFormat, ModelIdentity, ModelSession, ModelTask, ModelType -from .ann import AnnSession +from ..sessions.ann import AnnSession class InferenceModel(ABC): @@ -24,20 +23,16 @@ def __init__( self, model_name: str, cache_dir: Path | str | None = None, - providers: list[str] | None = None, - provider_options: list[dict[str, Any]] | None = None, - sess_options: ort.SessionOptions | None = None, preferred_format: ModelFormat | None = None, + session: ModelSession | None = None, **model_kwargs: Any, ) -> None: - self.loaded = False + self.loaded = session is not None self.load_attempts = 0 self.model_name = clean_name(model_name) self.cache_dir = Path(cache_dir) if cache_dir is not None else self.cache_dir_default - self.providers = providers if providers is not None else self.providers_default - self.provider_options = provider_options if provider_options is not None else self.provider_options_default - self.sess_options = sess_options if sess_options is not None else self.sess_options_default - self.preferred_format = preferred_format if preferred_format is not None else self.preferred_format_default + self.model_format = preferred_format if preferred_format is not None else self._model_format_default + self.session = session def download(self) -> None: if not self.cached: @@ -70,7 +65,7 @@ def configure(self, **kwargs: Any) -> None: pass def _download(self) -> None: - ignore_patterns = [] if self.preferred_format == ModelFormat.ARMNN else ["*.armnn"] + ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"] snapshot_download( f"immich-app/{clean_name(self.model_name)}", cache_dir=self.cache_dir, @@ -105,26 +100,11 @@ def clear_cache(self) -> None: self.cache_dir.mkdir(parents=True, exist_ok=True) def _make_session(self, model_path: Path) -> ModelSession: - if not model_path.is_file(): - onnx_path = model_path.with_suffix(".onnx") - if not onnx_path.is_file(): - raise ValueError(f"Model path '{model_path}' does not exist") - - log.warning( - f"Could not find model path '{model_path}'. " f"Falling back to ONNX model path '{onnx_path}' instead.", - ) - model_path = onnx_path - match model_path.suffix: case ".armnn": - session = AnnSession(model_path) + session: ModelSession = AnnSession(model_path) case ".onnx": - session = ort.InferenceSession( - model_path.as_posix(), - sess_options=self.sess_options, - providers=self.providers, - provider_options=self.provider_options, - ) + session = OrtSession(model_path) case _: raise ValueError(f"Unsupported model file type: {model_path.suffix}") return session @@ -135,7 +115,7 @@ def model_dir(self) -> Path: @property def model_path(self) -> Path: - return self.model_dir / f"model.{self.preferred_format}" + return self.model_dir / f"model.{self.model_format}" @property def model_task(self) -> ModelTask: @@ -162,95 +142,18 @@ def cached(self) -> bool: return self.model_path.is_file() @property - def providers(self) -> list[str]: - return self._providers - - @providers.setter - def providers(self, providers: list[str]) -> None: - log.info( - (f"Setting '{self.model_name}' execution providers to {providers}, " "in descending order of preference"), - ) - self._providers = providers - - @property - def providers_default(self) -> list[str]: - available_providers = set(ort.get_available_providers()) - log.debug(f"Available ORT providers: {available_providers}") - if (openvino := "OpenVINOExecutionProvider") in available_providers: - device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids() - log.debug(f"Available OpenVINO devices: {device_ids}") - - gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")] - if not gpu_devices: - log.warning("No GPU device found in OpenVINO. Falling back to CPU.") - available_providers.remove(openvino) - return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers] - - @property - def provider_options(self) -> list[dict[str, Any]]: - return self._provider_options - - @provider_options.setter - def provider_options(self, provider_options: list[dict[str, Any]]) -> None: - log.debug(f"Setting execution provider options to {provider_options}") - self._provider_options = provider_options - - @property - def provider_options_default(self) -> list[dict[str, Any]]: - options = [] - for provider in self.providers: - match provider: - case "CPUExecutionProvider" | "CUDAExecutionProvider": - option = {"arena_extend_strategy": "kSameAsRequested"} - case "OpenVINOExecutionProvider": - option = {"device_type": "GPU_FP32", "cache_dir": (self.cache_dir / "openvino").as_posix()} - case _: - option = {} - options.append(option) - return options - - @property - def sess_options(self) -> ort.SessionOptions: - return self._sess_options - - @sess_options.setter - def sess_options(self, sess_options: ort.SessionOptions) -> None: - log.debug(f"Setting execution_mode to {sess_options.execution_mode.name}") - log.debug(f"Setting inter_op_num_threads to {sess_options.inter_op_num_threads}") - log.debug(f"Setting intra_op_num_threads to {sess_options.intra_op_num_threads}") - self._sess_options = sess_options - - @property - def sess_options_default(self) -> ort.SessionOptions: - sess_options = ort.SessionOptions() - sess_options.enable_cpu_mem_arena = False - - # avoid thread contention between models - if settings.model_inter_op_threads > 0: - sess_options.inter_op_num_threads = settings.model_inter_op_threads - # these defaults work well for CPU, but bottleneck GPU - elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: - sess_options.inter_op_num_threads = 1 - - if settings.model_intra_op_threads > 0: - sess_options.intra_op_num_threads = settings.model_intra_op_threads - elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: - sess_options.intra_op_num_threads = 2 - - if sess_options.inter_op_num_threads > 1: - sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL - - return sess_options - - @property - def preferred_format(self) -> ModelFormat: + def model_format(self) -> ModelFormat: return self._preferred_format - @preferred_format.setter - def preferred_format(self, preferred_format: ModelFormat) -> None: + @model_format.setter + def model_format(self, preferred_format: ModelFormat) -> None: log.debug(f"Setting preferred format to {preferred_format}") self._preferred_format = preferred_format @property - def preferred_format_default(self) -> ModelFormat: - return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX + def _model_format_default(self) -> ModelFormat: + prefer_ann = ann.ann.is_available and settings.ann + ann_exists = (self.model_dir / "model.armnn").is_file() + if prefer_ann and not ann_exists: + log.warning(f"ARM NN is available, but '{self.model_name}' does not support ARM NN. Falling back to ONNX.") + return ModelFormat.ARMNN if prefer_ann and ann_exists else ModelFormat.ONNX diff --git a/machine-learning/app/models/facial_recognition/recognition.py b/machine-learning/app/models/facial_recognition/recognition.py index cb8093dd95e9e..d0c08c9365632 100644 --- a/machine-learning/app/models/facial_recognition/recognition.py +++ b/machine-learning/app/models/facial_recognition/recognition.py @@ -3,7 +3,6 @@ import numpy as np import onnx -import onnxruntime as ort from insightface.model_zoo import ArcFaceONNX from insightface.utils.face_align import norm_crop from numpy.typing import NDArray @@ -13,7 +12,8 @@ from app.config import clean_name, log from app.models.base import InferenceModel from app.models.transforms import decode_cv2 -from app.schemas import FaceDetectionOutput, FacialRecognitionOutput, ModelSession, ModelTask, ModelType +from app.schemas import FaceDetectionOutput, FacialRecognitionOutput, ModelFormat, ModelSession, ModelTask, ModelType +from app.sessions import has_batch_axis class FaceRecognizer(InferenceModel): @@ -28,13 +28,15 @@ def __init__( **model_kwargs: Any, ) -> None: self.min_score = model_kwargs.pop("minScore", min_score) + self.batch = False super().__init__(clean_name(model_name), cache_dir, **model_kwargs) def _load(self) -> ModelSession: session = self._make_session(self.model_path) - if not self._has_batch_dim(session): - self._add_batch_dim(self.model_path) + if self.model_format == ModelFormat.ONNX and not has_batch_axis(session): + self._add_batch_axis(self.model_path) session = self._make_session(self.model_path) + self.batch = True self.model = ArcFaceONNX( self.model_path.with_suffix(".onnx").as_posix(), session=session, @@ -47,9 +49,20 @@ def _predict( if faces["boxes"].shape[0] == 0: return [] inputs = decode_cv2(inputs) - embeddings: NDArray[np.float32] = self.model.get_feat(self._crop(inputs, faces)) + cropped_faces = self._crop(inputs, faces) + embeddings = self._predict_batch(cropped_faces) if self.batch else self._predict_single(cropped_faces) return self.postprocess(faces, embeddings) + def _predict_batch(self, cropped_faces: list[NDArray[np.uint8]]) -> NDArray[np.float32]: + embeddings: NDArray[np.float32] = self.model.get_feat(cropped_faces) + return embeddings + + def _predict_single(self, cropped_faces: list[NDArray[np.uint8]]) -> NDArray[np.float32]: + embeddings: list[NDArray[np.float32]] = [] + for face in cropped_faces: + embeddings.append(self.model.get_feat(face)) + return np.concatenate(embeddings, axis=0) + def postprocess(self, faces: FaceDetectionOutput, embeddings: NDArray[np.float32]) -> FacialRecognitionOutput: return [ { @@ -63,11 +76,8 @@ def postprocess(self, faces: FaceDetectionOutput, embeddings: NDArray[np.float32 def _crop(self, image: NDArray[np.uint8], faces: FaceDetectionOutput) -> list[NDArray[np.uint8]]: return [norm_crop(image, landmark) for landmark in faces["landmarks"]] - def _has_batch_dim(self, session: ort.InferenceSession) -> bool: - return not isinstance(session, ort.InferenceSession) or session.get_inputs()[0].shape[0] == "batch" - - def _add_batch_dim(self, model_path: Path) -> None: - log.debug(f"Adding batch dimension to model {model_path}") + def _add_batch_axis(self, model_path: Path) -> None: + log.debug(f"Adding batch axis to model {model_path}") proto = onnx.load(model_path) static_input_dims = [shape.dim_value for shape in proto.graph.input[0].type.tensor_type.shape.dim[1:]] static_output_dims = [shape.dim_value for shape in proto.graph.output[0].type.tensor_type.shape.dim[1:]] diff --git a/machine-learning/app/models/session.py b/machine-learning/app/models/session.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/machine-learning/app/schemas.py b/machine-learning/app/schemas.py index 739098a6bfadd..f051db12c3d4d 100644 --- a/machine-learning/app/schemas.py +++ b/machine-learning/app/schemas.py @@ -54,6 +54,14 @@ class ModelSource(StrEnum): ModelIdentity = tuple[ModelType, ModelTask] +class SessionNode(Protocol): + @property + def name(self) -> str | None: ... + + @property + def shape(self) -> tuple[int, ...]: ... + + class ModelSession(Protocol): def run( self, @@ -62,6 +70,10 @@ def run( run_options: Any = None, ) -> list[npt.NDArray[np.float32]]: ... + def get_inputs(self) -> list[SessionNode]: ... + + def get_outputs(self) -> list[SessionNode]: ... + class HasProfiling(Protocol): profiling: dict[str, float] diff --git a/machine-learning/app/sessions/__init__.py b/machine-learning/app/sessions/__init__.py new file mode 100644 index 0000000000000..e0c00ea4a0472 --- /dev/null +++ b/machine-learning/app/sessions/__init__.py @@ -0,0 +1,5 @@ +from app.schemas import ModelSession + + +def has_batch_axis(session: ModelSession) -> bool: + return not isinstance(session.get_inputs()[0].shape[0], int) or session.get_inputs()[0].shape[0] < 0 diff --git a/machine-learning/app/models/ann.py b/machine-learning/app/sessions/ann.py similarity index 94% rename from machine-learning/app/models/ann.py rename to machine-learning/app/sessions/ann.py index 6bd3065ab918d..57da57823a8e6 100644 --- a/machine-learning/app/models/ann.py +++ b/machine-learning/app/sessions/ann.py @@ -7,6 +7,7 @@ from numpy.typing import NDArray from ann.ann import Ann +from app.schemas import SessionNode from ..config import log, settings @@ -45,11 +46,11 @@ def __del__(self) -> None: log.info("Unloaded ANN model %d", self.model) self.ann.destroy() - def get_inputs(self) -> list[AnnNode]: + def get_inputs(self) -> list[SessionNode]: shapes = self.ann.input_shapes[self.model] return [AnnNode(None, s) for s in shapes] - def get_outputs(self) -> list[AnnNode]: + def get_outputs(self) -> list[SessionNode]: shapes = self.ann.output_shapes[self.model] return [AnnNode(None, s) for s in shapes] diff --git a/machine-learning/app/sessions/ort.py b/machine-learning/app/sessions/ort.py new file mode 100644 index 0000000000000..e523388def169 --- /dev/null +++ b/machine-learning/app/sessions/ort.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import numpy as np +import onnxruntime as ort +from numpy.typing import NDArray + +from app.models.constants import SUPPORTED_PROVIDERS +from app.schemas import SessionNode + +from ..config import log, settings + + +class OrtSession: + def __init__( + self, + model_path: Path, + providers: list[str] | None = None, + provider_options: list[dict[str, Any]] | None = None, + sess_options: ort.SessionOptions | None = None, + ): + self.model_path = model_path + self.providers = providers if providers is not None else self._providers_default + self.provider_options = provider_options if provider_options is not None else self._provider_options_default + self.sess_options = sess_options if sess_options is not None else self._sess_options_default + self.session = ort.InferenceSession( + model_path.as_posix(), + providers=self.providers, + provider_options=self.provider_options, + sess_options=self.sess_options, + ) + + def get_inputs(self) -> list[SessionNode]: + inputs: list[SessionNode] = self.session.get_inputs() + return inputs + + def get_outputs(self) -> list[SessionNode]: + outputs: list[SessionNode] = self.session.get_outputs() + return outputs + + def run( + self, + output_names: list[str] | None, + input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]], + run_options: Any = None, + ) -> list[NDArray[np.float32]]: + outputs: list[NDArray[np.float32]] = self.session.run(output_names, input_feed, run_options) + return outputs + + @property + def providers(self) -> list[str]: + return self._providers + + @providers.setter + def providers(self, providers: list[str]) -> None: + log.info(f"Setting execution providers to {providers}, in descending order of preference") + self._providers = providers + + @property + def _providers_default(self) -> list[str]: + available_providers = set(ort.get_available_providers()) + log.debug(f"Available ORT providers: {available_providers}") + if (openvino := "OpenVINOExecutionProvider") in available_providers: + device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids() + log.debug(f"Available OpenVINO devices: {device_ids}") + + gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")] + if not gpu_devices: + log.warning("No GPU device found in OpenVINO. Falling back to CPU.") + available_providers.remove(openvino) + return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers] + + @property + def provider_options(self) -> list[dict[str, Any]]: + return self._provider_options + + @provider_options.setter + def provider_options(self, provider_options: list[dict[str, Any]]) -> None: + log.debug(f"Setting execution provider options to {provider_options}") + self._provider_options = provider_options + + @property + def _provider_options_default(self) -> list[dict[str, Any]]: + options = [] + for provider in self.providers: + match provider: + case "CPUExecutionProvider" | "CUDAExecutionProvider": + option = {"arena_extend_strategy": "kSameAsRequested"} + case "OpenVINOExecutionProvider": + option = {"device_type": "GPU_FP32", "cache_dir": (self.model_path.parent / "openvino").as_posix()} + case _: + option = {} + options.append(option) + return options + + @property + def sess_options(self) -> ort.SessionOptions: + return self._sess_options + + @sess_options.setter + def sess_options(self, sess_options: ort.SessionOptions) -> None: + log.debug(f"Setting execution_mode to {sess_options.execution_mode.name}") + log.debug(f"Setting inter_op_num_threads to {sess_options.inter_op_num_threads}") + log.debug(f"Setting intra_op_num_threads to {sess_options.intra_op_num_threads}") + self._sess_options = sess_options + + @property + def _sess_options_default(self) -> ort.SessionOptions: + sess_options = ort.SessionOptions() + sess_options.enable_cpu_mem_arena = False + + # avoid thread contention between models + if settings.model_inter_op_threads > 0: + sess_options.inter_op_num_threads = settings.model_inter_op_threads + # these defaults work well for CPU, but bottleneck GPU + elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: + sess_options.inter_op_num_threads = 1 + + if settings.model_intra_op_threads > 0: + sess_options.intra_op_num_threads = settings.model_intra_op_threads + elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: + sess_options.intra_op_num_threads = 2 + + if sess_options.inter_op_num_threads > 1: + sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL + + return sess_options diff --git a/machine-learning/app/test_main.py b/machine-learning/app/test_main.py index 2068c7a4c6856..89111c2f60c9d 100644 --- a/machine-learning/app/test_main.py +++ b/machine-learning/app/test_main.py @@ -162,7 +162,7 @@ def test_sets_default_preferred_format(self, mocker: MockerFixture) -> None: encoder = OpenClipTextualEncoder("ViT-B-32__openai") - assert encoder.preferred_format == ModelFormat.ONNX + assert encoder.model_format == ModelFormat.ONNX def test_sets_default_preferred_format_to_armnn_if_available(self, mocker: MockerFixture) -> None: mocker.patch.object(settings, "ann", True) @@ -170,7 +170,7 @@ def test_sets_default_preferred_format_to_armnn_if_available(self, mocker: Mocke encoder = OpenClipTextualEncoder("ViT-B-32__openai") - assert encoder.preferred_format == ModelFormat.ARMNN + assert encoder.model_format == ModelFormat.ARMNN def test_sets_preferred_format_kwarg(self, mocker: MockerFixture) -> None: mocker.patch.object(settings, "ann", False) @@ -178,7 +178,7 @@ def test_sets_preferred_format_kwarg(self, mocker: MockerFixture) -> None: encoder = OpenClipTextualEncoder("ViT-B-32__openai", preferred_format=ModelFormat.ARMNN) - assert encoder.preferred_format == ModelFormat.ARMNN + assert encoder.model_format == ModelFormat.ARMNN def test_casts_cache_dir_string_to_path(self) -> None: cache_dir = "/test_cache" From 17ef8a80896a9b49d808258361d8a71f6531d0f4 Mon Sep 17 00:00:00 2001 From: mertalev <101130780+mertalev@users.noreply.github.com> Date: Sat, 22 Jun 2024 16:34:09 -0400 Subject: [PATCH 2/4] update tests --- machine-learning/app/conftest.py | 62 ++- machine-learning/app/models/base.py | 4 +- .../models/facial_recognition/recognition.py | 5 +- machine-learning/app/sessions/ort.py | 6 +- machine-learning/app/test_main.py | 353 +++++++----------- machine-learning/pyproject.toml | 2 +- 6 files changed, 207 insertions(+), 225 deletions(-) diff --git a/machine-learning/app/conftest.py b/machine-learning/app/conftest.py index efd5da5f38a5f..f5e3f95cc858c 100644 --- a/machine-learning/app/conftest.py +++ b/machine-learning/app/conftest.py @@ -9,6 +9,7 @@ from PIL import Image from .main import app +from app.config import log @pytest.fixture @@ -96,12 +97,69 @@ def clip_tokenizer_cfg() -> dict[str, Any]: @pytest.fixture(scope="function") -def providers(request: pytest.FixtureRequest) -> Iterator[dict[str, Any]]: +def providers(request: pytest.FixtureRequest) -> Iterator[mock.Mock]: marker = request.node.get_closest_marker("providers") if marker is None: raise ValueError("Missing marker 'providers'") providers = marker.args[0] - with mock.patch("app.models.base.ort.get_available_providers") as mocked: + with mock.patch("app.sessions.ort.ort.get_available_providers") as mocked: mocked.return_value = providers yield providers + + +@pytest.fixture(scope="function") +def ort_pybind() -> Iterator[mock.Mock]: + with mock.patch("app.sessions.ort.ort.capi._pybind_state") as mocked: + yield mocked + + +@pytest.fixture(scope="function") +def ov_device_ids(request: pytest.FixtureRequest, ort_pybind: mock.Mock) -> Iterator[mock.Mock]: + marker = request.node.get_closest_marker("ov_device_ids") + if marker is None: + raise ValueError("Missing marker 'ov_device_ids'") + ort_pybind.get_available_openvino_device_ids.return_value = marker.args[0] + return ort_pybind + + +@pytest.fixture(scope="function") +def ort_session() -> Iterator[mock.Mock]: + with mock.patch("app.sessions.ort.ort.InferenceSession") as mocked: + yield mocked + + +@pytest.fixture(scope="function") +def rmtree() -> Iterator[mock.Mock]: + with mock.patch("app.models.base.rmtree", autospec=True) as mocked: + mocked.avoids_symlink_attacks = True + yield mocked + + +@pytest.fixture(scope="function") +def cache_dir() -> Iterator[mock.Mock]: + mock_cache_dir = mock.MagicMock() + mock_cache_dir.exists.return_value = True + mock_cache_dir.is_dir.return_value = True + mock_cache_dir.is_file.return_value = True + + with mock.patch("app.models.base.Path", return_value=mock_cache_dir) as mocked: + yield mocked + + +@pytest.fixture(scope="function") +def info() -> Iterator[mock.Mock]: + with mock.patch.object(log, "info") as mocked: + yield mocked + + +@pytest.fixture(scope="function") +def warning() -> Iterator[mock.Mock]: + with mock.patch.object(log, "warning") as mocked: + yield mocked + + +@pytest.fixture(scope="function") +def snapshot_download() -> Iterator[mock.Mock]: + with mock.patch("app.models.base.snapshot_download") as mocked: + yield mocked diff --git a/machine-learning/app/models/base.py b/machine-learning/app/models/base.py index cad8239766468..a4e2b13c72599 100644 --- a/machine-learning/app/models/base.py +++ b/machine-learning/app/models/base.py @@ -30,7 +30,7 @@ def __init__( self.loaded = session is not None self.load_attempts = 0 self.model_name = clean_name(model_name) - self.cache_dir = Path(cache_dir) if cache_dir is not None else self.cache_dir_default + self.cache_dir = Path(cache_dir) if cache_dir is not None else self._cache_dir_default self.model_format = preferred_format if preferred_format is not None else self._model_format_default self.session = session @@ -134,7 +134,7 @@ def cache_dir(self, cache_dir: Path) -> None: self._cache_dir = cache_dir @property - def cache_dir_default(self) -> Path: + def _cache_dir_default(self) -> Path: return settings.cache_folder / self.model_task.value / self.model_name @property diff --git a/machine-learning/app/models/facial_recognition/recognition.py b/machine-learning/app/models/facial_recognition/recognition.py index d0c08c9365632..24ce816385c25 100644 --- a/machine-learning/app/models/facial_recognition/recognition.py +++ b/machine-learning/app/models/facial_recognition/recognition.py @@ -27,16 +27,15 @@ def __init__( cache_dir: Path | str | None = None, **model_kwargs: Any, ) -> None: - self.min_score = model_kwargs.pop("minScore", min_score) - self.batch = False super().__init__(clean_name(model_name), cache_dir, **model_kwargs) + self.min_score = model_kwargs.pop("minScore", min_score) + self.batch = self.model_format == ModelFormat.ONNX def _load(self) -> ModelSession: session = self._make_session(self.model_path) if self.model_format == ModelFormat.ONNX and not has_batch_axis(session): self._add_batch_axis(self.model_path) session = self._make_session(self.model_path) - self.batch = True self.model = ArcFaceONNX( self.model_path.with_suffix(".onnx").as_posix(), session=session, diff --git a/machine-learning/app/sessions/ort.py b/machine-learning/app/sessions/ort.py index e523388def169..dfa2f8417ca6d 100644 --- a/machine-learning/app/sessions/ort.py +++ b/machine-learning/app/sessions/ort.py @@ -16,17 +16,17 @@ class OrtSession: def __init__( self, - model_path: Path, + model_path: Path | str, providers: list[str] | None = None, provider_options: list[dict[str, Any]] | None = None, sess_options: ort.SessionOptions | None = None, ): - self.model_path = model_path + self.model_path = Path(model_path) self.providers = providers if providers is not None else self._providers_default self.provider_options = provider_options if provider_options is not None else self._provider_options_default self.sess_options = sess_options if sess_options is not None else self._sess_options_default self.session = ort.InferenceSession( - model_path.as_posix(), + self.model_path.as_posix(), providers=self.providers, provider_options=self.provider_options, sess_options=self.sess_options, diff --git a/machine-learning/app/test_main.py b/machine-learning/app/test_main.py index 89111c2f60c9d..eff2f4c40f883 100644 --- a/machine-learning/app/test_main.py +++ b/machine-learning/app/test_main.py @@ -22,129 +22,15 @@ from app.models.clip.visual import OpenClipVisualEncoder from app.models.facial_recognition.detection import FaceDetector from app.models.facial_recognition.recognition import FaceRecognizer +from app.sessions.ort import OrtSession -from .config import Settings, log, settings +from .config import Settings, settings from .models.base import InferenceModel from .models.cache import ModelCache from .schemas import ModelFormat, ModelTask, ModelType class TestBase: - CPU_EP = ["CPUExecutionProvider"] - CUDA_EP = ["CUDAExecutionProvider", "CPUExecutionProvider"] - OV_EP = ["OpenVINOExecutionProvider", "CPUExecutionProvider"] - CUDA_EP_OUT_OF_ORDER = ["CPUExecutionProvider", "CUDAExecutionProvider"] - TRT_EP = ["TensorrtExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"] - - @pytest.mark.providers(CPU_EP) - def test_sets_cpu_provider(self, providers: list[str]) -> None: - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - - assert encoder.providers == self.CPU_EP - - @pytest.mark.providers(CUDA_EP) - def test_sets_cuda_provider_if_available(self, providers: list[str]) -> None: - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - - assert encoder.providers == self.CUDA_EP - - @pytest.mark.providers(OV_EP) - def test_sets_openvino_provider_if_available(self, providers: list[str], mocker: MockerFixture) -> None: - mocked = mocker.patch("app.models.base.ort.capi._pybind_state") - mocked.get_available_openvino_device_ids.return_value = ["GPU.0", "CPU"] - - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - - assert encoder.providers == self.OV_EP - - @pytest.mark.providers(OV_EP) - def test_avoids_openvino_if_gpu_not_available(self, providers: list[str], mocker: MockerFixture) -> None: - mocked = mocker.patch("app.models.base.ort.capi._pybind_state") - mocked.get_available_openvino_device_ids.return_value = ["CPU"] - - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - - assert encoder.providers == self.CPU_EP - - @pytest.mark.providers(CUDA_EP_OUT_OF_ORDER) - def test_sets_providers_in_correct_order(self, providers: list[str]) -> None: - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - - assert encoder.providers == self.CUDA_EP - - @pytest.mark.providers(TRT_EP) - def test_ignores_unsupported_providers(self, providers: list[str]) -> None: - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - - assert encoder.providers == self.CUDA_EP - - def test_sets_provider_kwarg(self) -> None: - providers = ["CUDAExecutionProvider"] - encoder = OpenClipTextualEncoder("ViT-B-32__openai", providers=providers) - - assert encoder.providers == providers - - def test_sets_default_provider_options(self, mocker: MockerFixture) -> None: - mocked = mocker.patch("app.models.base.ort.capi._pybind_state") - mocked.get_available_openvino_device_ids.return_value = ["GPU.0", "CPU"] - - encoder = OpenClipTextualEncoder( - "ViT-B-32__openai", providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"] - ) - - assert encoder.provider_options == [ - {"device_type": "GPU_FP32", "cache_dir": (encoder.cache_dir / "openvino").as_posix()}, - {"arena_extend_strategy": "kSameAsRequested"}, - ] - - def test_sets_provider_options_kwarg(self) -> None: - encoder = OpenClipTextualEncoder( - "ViT-B-32__openai", - providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"], - provider_options=[], - ) - - assert encoder.provider_options == [] - - def test_sets_default_sess_options(self) -> None: - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - - assert encoder.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL - assert encoder.sess_options.inter_op_num_threads == 1 - assert encoder.sess_options.intra_op_num_threads == 2 - assert encoder.sess_options.enable_cpu_mem_arena is False - - def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None: - encoder = OpenClipTextualEncoder( - "ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"] - ) - - assert encoder.sess_options.inter_op_num_threads == 0 - assert encoder.sess_options.intra_op_num_threads == 0 - - def test_sets_default_sess_options_sets_threads_if_non_cpu_and_set_threads(self, mocker: MockerFixture) -> None: - mock_settings = mocker.patch("app.models.base.settings", autospec=True) - mock_settings.model_inter_op_threads = 2 - mock_settings.model_intra_op_threads = 4 - - encoder = OpenClipTextualEncoder( - "ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"] - ) - - assert encoder.sess_options.inter_op_num_threads == 2 - assert encoder.sess_options.intra_op_num_threads == 4 - - def test_sets_sess_options_kwarg(self) -> None: - sess_options = ort.SessionOptions() - encoder = OpenClipTextualEncoder( - "ViT-B-32__openai", - providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"], - provider_options=[], - sess_options=sess_options, - ) - - assert sess_options is encoder.sess_options - def test_sets_default_cache_dir(self) -> None: encoder = OpenClipTextualEncoder("ViT-B-32__openai") @@ -164,11 +50,16 @@ def test_sets_default_preferred_format(self, mocker: MockerFixture) -> None: assert encoder.model_format == ModelFormat.ONNX - def test_sets_default_preferred_format_to_armnn_if_available(self, mocker: MockerFixture) -> None: + def test_sets_default_preferred_format_to_armnn_if_available(self, cache_dir, mocker: MockerFixture) -> None: mocker.patch.object(settings, "ann", True) mocker.patch("ann.ann.is_available", True) + mock_model_path = mocker.MagicMock() + mock_model_path.is_file.return_value = True + mock_model_path.suffix = ".armnn" + mock_model_path.with_suffix.return_value = mock_model_path + cache_dir.return_value = mock_model_path - encoder = OpenClipTextualEncoder("ViT-B-32__openai") + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) assert encoder.model_format == ModelFormat.ARMNN @@ -186,120 +77,49 @@ def test_casts_cache_dir_string_to_path(self) -> None: assert encoder.cache_dir == Path(cache_dir) - def test_clear_cache(self, mocker: MockerFixture) -> None: - mock_rmtree = mocker.patch("app.models.base.rmtree", autospec=True) - mock_rmtree.avoids_symlink_attacks = True - mock_cache_dir = mocker.Mock() - mock_cache_dir.exists.return_value = True - mock_cache_dir.is_dir.return_value = True - mocker.patch("app.models.base.Path", return_value=mock_cache_dir) - info = mocker.spy(log, "info") - - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=mock_cache_dir) + def test_clear_cache(self, rmtree: mock.Mock, cache_dir: mock.Mock, info: mock.Mock) -> None: + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) encoder.clear_cache() - mock_rmtree.assert_called_once_with(encoder.cache_dir) + rmtree.assert_called_once_with(encoder.cache_dir) info.assert_called_with(f"Cleared cache directory for model '{encoder.model_name}'.") - def test_clear_cache_warns_if_path_does_not_exist(self, mocker: MockerFixture) -> None: - mock_rmtree = mocker.patch("app.models.base.rmtree", autospec=True) - mock_rmtree.avoids_symlink_attacks = True - mock_cache_dir = mocker.Mock() - mock_cache_dir.exists.return_value = False - mock_cache_dir.is_dir.return_value = True - mocker.patch("app.models.base.Path", return_value=mock_cache_dir) - warning = mocker.spy(log, "warning") + def test_clear_cache_warns_if_path_does_not_exist( + self, rmtree: mock.Mock, cache_dir: mock.Mock, warning: mock.Mock + ) -> None: + cache_dir.return_value.exists.return_value = False - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=mock_cache_dir) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) encoder.clear_cache() - mock_rmtree.assert_not_called() + rmtree.assert_not_called() warning.assert_called_once() - def test_clear_cache_raises_exception_if_vulnerable_to_symlink_attack(self, mocker: MockerFixture) -> None: - mock_rmtree = mocker.patch("app.models.base.rmtree", autospec=True) - mock_rmtree.avoids_symlink_attacks = False - mock_cache_dir = mocker.Mock() - mock_cache_dir.exists.return_value = True - mock_cache_dir.is_dir.return_value = True - mocker.patch("app.models.base.Path", return_value=mock_cache_dir) + def test_clear_cache_raises_exception_if_vulnerable_to_symlink_attack(self, rmtree, cache_dir) -> None: + rmtree.avoids_symlink_attacks = False - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=mock_cache_dir) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) with pytest.raises(RuntimeError): encoder.clear_cache() - mock_rmtree.assert_not_called() + rmtree.assert_not_called() - def test_clear_cache_replaces_file_with_dir_if_path_is_file(self, mocker: MockerFixture) -> None: - mock_rmtree = mocker.patch("app.models.base.rmtree", autospec=True) - mock_rmtree.avoids_symlink_attacks = True - mock_cache_dir = mocker.Mock() - mock_cache_dir.exists.return_value = True - mock_cache_dir.is_dir.return_value = False - mocker.patch("app.models.base.Path", return_value=mock_cache_dir) - warning = mocker.spy(log, "warning") + def test_clear_cache_replaces_file_with_dir_if_path_is_file(self, rmtree, cache_dir, warning) -> None: + cache_dir.return_value.is_dir.return_value = False - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=mock_cache_dir) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) encoder.clear_cache() - mock_rmtree.assert_not_called() - mock_cache_dir.unlink.assert_called_once() - mock_cache_dir.mkdir.assert_called_once() + rmtree.assert_not_called() + cache_dir.return_value.unlink.assert_called_once() + cache_dir.return_value.mkdir.assert_called_once() warning.assert_called_once() - def test_make_session_return_ann_if_available(self, mocker: MockerFixture) -> None: - mock_model_path = mocker.Mock() - mock_model_path.is_file.return_value = True - mock_model_path.suffix = ".armnn" - mock_model_path.with_suffix.return_value = mock_model_path - mock_ann = mocker.patch("app.models.base.AnnSession") - - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - encoder._make_session(mock_model_path) - - mock_ann.assert_called_once() - - def test_make_session_return_ort_if_available_and_ann_is_not(self, mocker: MockerFixture) -> None: - mock_armnn_path = mocker.Mock() - mock_armnn_path.is_file.return_value = False - mock_armnn_path.suffix = ".armnn" - - mock_onnx_path = mocker.Mock() - mock_onnx_path.is_file.return_value = True - mock_onnx_path.suffix = ".onnx" - mock_armnn_path.with_suffix.return_value = mock_onnx_path - - mock_ann = mocker.patch("app.models.base.AnnSession") - mock_ort = mocker.patch("app.models.base.ort.InferenceSession") - - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - encoder._make_session(mock_armnn_path) - - mock_ort.assert_called_once() - mock_ann.assert_not_called() - - def test_make_session_raises_exception_if_path_does_not_exist(self, mocker: MockerFixture) -> None: - mock_model_path = mocker.Mock() - mock_model_path.is_file.return_value = False - mock_model_path.suffix = ".onnx" - mock_model_path.with_suffix.return_value = mock_model_path - mock_ann = mocker.patch("app.models.base.AnnSession") - mock_ort = mocker.patch("app.models.base.ort.InferenceSession") - - encoder = OpenClipTextualEncoder("ViT-B-32__openai") - with pytest.raises(ValueError): - encoder._make_session(mock_model_path) - - mock_ann.assert_not_called() - mock_ort.assert_not_called() - - def test_download(self, mocker: MockerFixture) -> None: - mock_snapshot_download = mocker.patch("app.models.base.snapshot_download") - + def test_download(self, snapshot_download: mock.Mock) -> None: encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir="/path/to/cache") encoder.download() - mock_snapshot_download.assert_called_once_with( + snapshot_download.assert_called_once_with( "immich-app/ViT-B-32__openai", cache_dir=encoder.cache_dir, local_dir=encoder.cache_dir, @@ -307,13 +127,11 @@ def test_download(self, mocker: MockerFixture) -> None: ignore_patterns=["*.armnn"], ) - def test_download_downloads_armnn_if_preferred_format(self, mocker: MockerFixture) -> None: - mock_snapshot_download = mocker.patch("app.models.base.snapshot_download") - + def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None: encoder = OpenClipTextualEncoder("ViT-B-32__openai", preferred_format=ModelFormat.ARMNN) encoder.download() - mock_snapshot_download.assert_called_once_with( + snapshot_download.assert_called_once_with( "immich-app/ViT-B-32__openai", cache_dir=encoder.cache_dir, local_dir=encoder.cache_dir, @@ -322,6 +140,113 @@ def test_download_downloads_armnn_if_preferred_format(self, mocker: MockerFixtur ) +@pytest.mark.usefixtures("ort_session") +class TestOrtSession: + CPU_EP = ["CPUExecutionProvider"] + CUDA_EP = ["CUDAExecutionProvider", "CPUExecutionProvider"] + OV_EP = ["OpenVINOExecutionProvider", "CPUExecutionProvider"] + CUDA_EP_OUT_OF_ORDER = ["CPUExecutionProvider", "CUDAExecutionProvider"] + TRT_EP = ["TensorrtExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"] + + @pytest.mark.providers(CPU_EP) + def test_sets_cpu_provider(self, providers: list[str]) -> None: + session = OrtSession("ViT-B-32__openai") + + assert session.providers == self.CPU_EP + + @pytest.mark.providers(CUDA_EP) + def test_sets_cuda_provider_if_available(self, providers: list[str]) -> None: + session = OrtSession("ViT-B-32__openai") + + assert session.providers == self.CUDA_EP + + @pytest.mark.ov_device_ids(["GPU.0", "CPU"]) + @pytest.mark.providers(OV_EP) + def test_sets_openvino_provider_if_available(self, providers: list[str], ov_device_ids: list[str]) -> None: + session = OrtSession("ViT-B-32__openai") + + assert session.providers == self.OV_EP + + @pytest.mark.ov_device_ids(["CPU"]) + @pytest.mark.providers(OV_EP) + def test_avoids_openvino_if_gpu_not_available(self, providers: list[str], ov_device_ids: list[str]) -> None: + session = OrtSession("ViT-B-32__openai") + + assert session.providers == self.CPU_EP + + @pytest.mark.providers(CUDA_EP_OUT_OF_ORDER) + def test_sets_providers_in_correct_order(self, providers: list[str]) -> None: + session = OrtSession("ViT-B-32__openai") + + assert session.providers == self.CUDA_EP + + @pytest.mark.providers(TRT_EP) + def test_ignores_unsupported_providers(self, providers: list[str]) -> None: + session = OrtSession("ViT-B-32__openai") + + assert session.providers == self.CUDA_EP + + def test_sets_provider_kwarg(self) -> None: + providers = ["CUDAExecutionProvider"] + session = OrtSession("ViT-B-32__openai", providers=providers) + + assert session.providers == providers + + @pytest.mark.ov_device_ids(["GPU.0", "CPU"]) + def test_sets_default_provider_options(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"]) + + assert session.provider_options == [ + {"device_type": "GPU_FP32", "cache_dir": "/cache/ViT-B-32__openai/openvino"}, + {"arena_extend_strategy": "kSameAsRequested"}, + ] + + def test_sets_provider_options_kwarg(self) -> None: + session = OrtSession( + "ViT-B-32__openai", + providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"], + provider_options=[], + ) + + assert session.provider_options == [] + + def test_sets_default_sess_options(self) -> None: + session = OrtSession("ViT-B-32__openai") + + assert session.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL + assert session.sess_options.inter_op_num_threads == 1 + assert session.sess_options.intra_op_num_threads == 2 + assert session.sess_options.enable_cpu_mem_arena is False + + def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None: + session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + + assert session.sess_options.inter_op_num_threads == 0 + assert session.sess_options.intra_op_num_threads == 0 + + def test_sets_default_sess_options_sets_threads_if_non_cpu_and_set_threads(self, mocker: MockerFixture) -> None: + mock_settings = mocker.patch("app.sessions.ort.settings", autospec=True) + mock_settings.model_inter_op_threads = 2 + mock_settings.model_intra_op_threads = 4 + + session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + + assert session.sess_options.inter_op_num_threads == 2 + assert session.sess_options.intra_op_num_threads == 4 + + def test_sets_sess_options_kwarg(self) -> None: + sess_options = ort.SessionOptions() + session = OrtSession( + "ViT-B-32__openai", + providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"], + provider_options=[], + sess_options=sess_options, + ) + + assert sess_options is session.sess_options + + class TestCLIP: embedding = np.random.rand(512).astype(np.float32) cache_dir = Path("test_cache") diff --git a/machine-learning/pyproject.toml b/machine-learning/pyproject.toml index 6c60b5887b902..8036c93a4226b 100644 --- a/machine-learning/pyproject.toml +++ b/machine-learning/pyproject.toml @@ -97,4 +97,4 @@ line-length = 120 target-version = ['py311'] [tool.pytest.ini_options] -markers = ["providers"] +markers = ["providers", "ov_device_ids"] From 6ff6b9c99949ed0c51c18af51f5d9e97b217c477 Mon Sep 17 00:00:00 2001 From: mertalev <101130780+mertalev@users.noreply.github.com> Date: Sat, 22 Jun 2024 23:21:45 -0400 Subject: [PATCH 3/4] add tests --- machine-learning/ann/ann.py | 18 ++-- machine-learning/app/conftest.py | 20 ++-- machine-learning/app/sessions/ann.py | 24 ++--- machine-learning/app/test_main.py | 145 +++++++++++++++++++++++---- 4 files changed, 158 insertions(+), 49 deletions(-) diff --git a/machine-learning/ann/ann.py b/machine-learning/ann/ann.py index 148d5ba101d75..d3cb8bc821b09 100644 --- a/machine-learning/ann/ann.py +++ b/machine-learning/ann/ann.py @@ -52,8 +52,6 @@ class Ann(metaclass=_Singleton): def __init__(self, log_level: int = 3, tuning_level: int = 1, tuning_file: str | None = None) -> None: if not is_available: raise RuntimeError("libann is not available!") - if tuning_file and not exists(tuning_file): - raise ValueError("tuning_file must point to an existing (possibly empty) file!") if tuning_level == 0 and tuning_file is None: raise ValueError("tuning_level == 0 reads existing tuning information and requires a tuning_file") if tuning_level < 0 or tuning_level > 3: @@ -67,6 +65,12 @@ def __init__(self, log_level: int = 3, tuning_level: int = 1, tuning_file: str | self.input_shapes: dict[int, tuple[tuple[int], ...]] = {} self.ann: int | None = None self.new() + + if self.tuning_file is not None: + # make sure tuning file exists (without clearing contents) + # once filled, the tuning file reduces the cost/time of the first + # inference after model load by 10s of seconds + open(self.tuning_file, "a").close() def new(self) -> None: if self.ann is None: @@ -95,17 +99,19 @@ def load( model_path: str, fast_math: bool = True, fp16: bool = False, - save_cached_network: bool = False, cached_network_path: str | None = None, ) -> int: if not model_path.endswith((".armnn", ".tflite", ".onnx")): raise ValueError("model_path must be a file with extension .armnn, .tflite or .onnx") if not exists(model_path): raise ValueError("model_path must point to an existing file!") + + save_cached_network = False if cached_network_path is not None and not exists(cached_network_path): - raise ValueError("cached_network_path must point to an existing (possibly empty) file!") - if save_cached_network and cached_network_path is None: - raise ValueError("save_cached_network is True, cached_network_path must be specified!") + save_cached_network = True + # create empty model cache file + open(cached_network_path, "a").close() + net_id: int = libann.load( self.ann, model_path.encode(), diff --git a/machine-learning/app/conftest.py b/machine-learning/app/conftest.py index f5e3f95cc858c..d40729c61a5a5 100644 --- a/machine-learning/app/conftest.py +++ b/machine-learning/app/conftest.py @@ -129,6 +129,12 @@ def ort_session() -> Iterator[mock.Mock]: yield mocked +@pytest.fixture(scope="function") +def ann_session() -> Iterator[mock.Mock]: + with mock.patch("app.sessions.ann.Ann") as mocked: + yield mocked + + @pytest.fixture(scope="function") def rmtree() -> Iterator[mock.Mock]: with mock.patch("app.models.base.rmtree", autospec=True) as mocked: @@ -137,13 +143,15 @@ def rmtree() -> Iterator[mock.Mock]: @pytest.fixture(scope="function") -def cache_dir() -> Iterator[mock.Mock]: - mock_cache_dir = mock.MagicMock() - mock_cache_dir.exists.return_value = True - mock_cache_dir.is_dir.return_value = True - mock_cache_dir.is_file.return_value = True +def path() -> Iterator[mock.Mock]: + path = mock.MagicMock() + path.exists.return_value = True + path.is_dir.return_value = True + path.is_file.return_value = True + path.with_suffix.return_value = path + path.return_value = path - with mock.patch("app.models.base.Path", return_value=mock_cache_dir) as mocked: + with mock.patch("app.models.base.Path", return_value=path) as mocked: yield mocked diff --git a/machine-learning/app/sessions/ann.py b/machine-learning/app/sessions/ann.py index 57da57823a8e6..618d6e99290a4 100644 --- a/machine-learning/app/sessions/ann.py +++ b/machine-learning/app/sessions/ann.py @@ -17,27 +17,15 @@ class AnnSession: Wrapper for ANN to be drop-in replacement for ONNX session. """ - def __init__(self, model_path: Path): - tuning_file = Path(settings.cache_folder) / "gpu-tuning.ann" - with tuning_file.open(mode="a"): - # make sure tuning file exists (without clearing contents) - # once filled, the tuning file reduces the cost/time of the first - # inference after model load by 10s of seconds - pass - self.ann = Ann(tuning_level=3, tuning_file=tuning_file.as_posix()) - log.info("Loading ANN model %s ...", model_path) - cache_file = model_path.with_suffix(".anncache") - save = False - if not cache_file.is_file(): - save = True - with cache_file.open(mode="a"): - # create empty model cache file - pass + def __init__(self, model_path: Path, cache_dir: Path = settings.cache_folder) -> None: + self.model_path = model_path + self.cache_dir = cache_dir + self.ann = Ann(tuning_level=3, tuning_file=(cache_dir / "gpu-tuning.ann").as_posix()) + log.info("Loading ANN model %s ...", model_path) self.model = self.ann.load( model_path.as_posix(), - save_cached_network=save, - cached_network_path=cache_file.as_posix(), + cached_network_path=model_path.with_suffix(".anncache").as_posix(), ) log.info("Loaded ANN model with ID %d", self.model) diff --git a/machine-learning/app/test_main.py b/machine-learning/app/test_main.py index eff2f4c40f883..d798951372146 100644 --- a/machine-learning/app/test_main.py +++ b/machine-learning/app/test_main.py @@ -9,6 +9,7 @@ import cv2 import numpy as np +import onnx import onnxruntime as ort import pytest from fastapi import HTTPException @@ -22,6 +23,7 @@ from app.models.clip.visual import OpenClipVisualEncoder from app.models.facial_recognition.detection import FaceDetector from app.models.facial_recognition.recognition import FaceRecognizer +from app.sessions.ann import AnnSession from app.sessions.ort import OrtSession from .config import Settings, settings @@ -50,16 +52,12 @@ def test_sets_default_preferred_format(self, mocker: MockerFixture) -> None: assert encoder.model_format == ModelFormat.ONNX - def test_sets_default_preferred_format_to_armnn_if_available(self, cache_dir, mocker: MockerFixture) -> None: + def test_sets_default_preferred_format_to_armnn_if_available(self, path: mock.Mock, mocker: MockerFixture) -> None: mocker.patch.object(settings, "ann", True) mocker.patch("ann.ann.is_available", True) - mock_model_path = mocker.MagicMock() - mock_model_path.is_file.return_value = True - mock_model_path.suffix = ".armnn" - mock_model_path.with_suffix.return_value = mock_model_path - cache_dir.return_value = mock_model_path + path.suffix = ".armnn" - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path) assert encoder.model_format == ModelFormat.ARMNN @@ -77,42 +75,46 @@ def test_casts_cache_dir_string_to_path(self) -> None: assert encoder.cache_dir == Path(cache_dir) - def test_clear_cache(self, rmtree: mock.Mock, cache_dir: mock.Mock, info: mock.Mock) -> None: - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) + def test_clear_cache(self, rmtree: mock.Mock, path: mock.Mock, info: mock.Mock) -> None: + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path) encoder.clear_cache() rmtree.assert_called_once_with(encoder.cache_dir) info.assert_called_with(f"Cleared cache directory for model '{encoder.model_name}'.") def test_clear_cache_warns_if_path_does_not_exist( - self, rmtree: mock.Mock, cache_dir: mock.Mock, warning: mock.Mock + self, rmtree: mock.Mock, path: mock.Mock, warning: mock.Mock ) -> None: - cache_dir.return_value.exists.return_value = False + path.return_value.exists.return_value = False - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path) encoder.clear_cache() rmtree.assert_not_called() warning.assert_called_once() - def test_clear_cache_raises_exception_if_vulnerable_to_symlink_attack(self, rmtree, cache_dir) -> None: + def test_clear_cache_raises_exception_if_vulnerable_to_symlink_attack( + self, rmtree: mock.Mock, path: mock.Mock + ) -> None: rmtree.avoids_symlink_attacks = False - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path) with pytest.raises(RuntimeError): encoder.clear_cache() rmtree.assert_not_called() - def test_clear_cache_replaces_file_with_dir_if_path_is_file(self, rmtree, cache_dir, warning) -> None: - cache_dir.return_value.is_dir.return_value = False + def test_clear_cache_replaces_file_with_dir_if_path_is_file( + self, rmtree: mock.Mock, path: mock.Mock, warning: mock.Mock + ) -> None: + path.return_value.is_dir.return_value = False - encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path) encoder.clear_cache() rmtree.assert_not_called() - cache_dir.return_value.unlink.assert_called_once() - cache_dir.return_value.mkdir.assert_called_once() + path.return_value.unlink.assert_called_once() + path.return_value.mkdir.assert_called_once() warning.assert_called_once() def test_download(self, snapshot_download: mock.Mock) -> None: @@ -247,6 +249,60 @@ def test_sets_sess_options_kwarg(self) -> None: assert sess_options is session.sess_options +class TestAnnSession: + def test_creates_ann_session(self, ann_session: mock.Mock, info: mock.Mock) -> None: + model_path = mock.MagicMock(spec=Path) + cache_dir = mock.MagicMock(spec=Path) + + AnnSession(model_path, cache_dir) + + ann_session.assert_called_once_with(tuning_level=3, tuning_file=(cache_dir / "gpu-tuning.ann").as_posix()) + ann_session.return_value.load.assert_called_once_with( + model_path.as_posix(), cached_network_path=model_path.with_suffix(".anncache").as_posix() + ) + info.assert_has_calls( + [ + mock.call("Loading ANN model %s ...", model_path), + mock.call("Loaded ANN model with ID %d", ann_session.return_value.load.return_value), + ] + ) + + def test_get_inputs(self, ann_session: mock.Mock) -> None: + ann_session.return_value.load.return_value = 123 + ann_session.return_value.input_shapes = {123: [(1, 3, 224, 224)]} + session = AnnSession(Path("ViT-B-32__openai")) + + inputs = session.get_inputs() + + assert len(inputs) == 1 + assert inputs[0].name is None + assert inputs[0].shape == (1, 3, 224, 224) + + def test_get_outputs(self, ann_session: mock.Mock) -> None: + ann_session.return_value.load.return_value = 123 + ann_session.return_value.output_shapes = {123: [(1, 3, 224, 224)]} + session = AnnSession(Path("ViT-B-32__openai")) + + outputs = session.get_outputs() + + assert len(outputs) == 1 + assert outputs[0].name is None + assert outputs[0].shape == (1, 3, 224, 224) + + def test_run(self, ann_session: mock.Mock, mocker: MockerFixture) -> None: + ann_session.return_value.load.return_value = 123 + np_spy = mocker.spy(np, "ascontiguousarray") + session = AnnSession(Path("ViT-B-32__openai")) + input1, input2 = np.random.rand(1, 3, 224, 224), np.random.rand(1, 3, 224, 224) + input_feed = {"input.1": input1, "input.2": input2} + + session.run(None, input_feed) + + ann_session.return_value.execute.assert_called_once_with(123, [input1, input2]) + np_spy.call_count == 2 + np_spy.assert_has_calls([mock.call(input1), mock.call(input2)]) + + class TestCLIP: embedding = np.random.rand(512).astype(np.float32) cache_dir = Path("test_cache") @@ -412,6 +468,57 @@ def test_recognition(self, cv_image: cv2.Mat, mocker: MockerFixture) -> None: assert isinstance(call_args[0][0], np.ndarray) assert call_args[0][0].shape == (112, 112, 3) + def test_recognition_adds_batch_axis_for_ort(self, ort_session, mocker: MockerFixture) -> None: + onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True) + update_dims = mocker.patch( + "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True + ) + mocker.patch("app.models.base.InferenceModel.download") + mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX") + + ort_session.return_value.get_inputs.return_value = [SimpleNamespace(name="input.1", shape=(1, 3, 224, 224))] + ort_session.return_value.get_outputs.return_value = [SimpleNamespace(name="output.1", shape=(1, 800))] + + proto = mock.Mock() + + input_dims = mock.Mock() + input_dims.name = "input.1" + input_dims.type.tensor_type.shape.dim = [SimpleNamespace(dim_value=size) for size in [1, 3, 224, 224]] + proto.graph.input = [input_dims] + + output_dims = mock.Mock() + output_dims.name = "output.1" + output_dims.type.tensor_type.shape.dim = [SimpleNamespace(dim_value=size) for size in [1, 800]] + proto.graph.output = [output_dims] + + onnx.load.return_value = proto + + face_recognizer = FaceRecognizer("buffalo_s") + face_recognizer.load() + + assert face_recognizer.batch is True + update_dims.assert_called_once_with(proto, {"input.1": ["batch", 3, 224, 224]}, {"output.1": ["batch", 800]}) + onnx.save.assert_called_once_with(update_dims.return_value, face_recognizer.model_path) + + def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session, mocker: MockerFixture) -> None: + onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True) + update_dims = mocker.patch( + "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True + ) + mocker.patch("app.models.base.InferenceModel.download") + mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX") + + ort_session.return_value.get_inputs.return_value = [SimpleNamespace(name="input.1", shape=('batch', 3, 224, 224))] + ort_session.return_value.get_outputs.return_value = [SimpleNamespace(name="output.1", shape=('batch', 800))] + + face_recognizer = FaceRecognizer("buffalo_s") + face_recognizer.load() + + assert face_recognizer.batch is True + update_dims.assert_not_called() + onnx.load.assert_not_called() + onnx.save.assert_not_called() + @pytest.mark.asyncio class TestCache: From 1eac415948b2b4306636f8b6868daf8979762841 Mon Sep 17 00:00:00 2001 From: mertalev <101130780+mertalev@users.noreply.github.com> Date: Sat, 22 Jun 2024 23:33:21 -0400 Subject: [PATCH 4/4] fix typing linting formatting --- machine-learning/app/conftest.py | 5 +++-- machine-learning/app/models/base.py | 3 ++- machine-learning/app/test_main.py | 15 ++++++++------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/machine-learning/app/conftest.py b/machine-learning/app/conftest.py index d40729c61a5a5..9548ad01482a6 100644 --- a/machine-learning/app/conftest.py +++ b/machine-learning/app/conftest.py @@ -8,9 +8,10 @@ from numpy.typing import NDArray from PIL import Image -from .main import app from app.config import log +from .main import app + @pytest.fixture def pil_image() -> Image.Image: @@ -150,7 +151,7 @@ def path() -> Iterator[mock.Mock]: path.is_file.return_value = True path.with_suffix.return_value = path path.return_value = path - + with mock.patch("app.models.base.Path", return_value=path) as mocked: yield mocked diff --git a/machine-learning/app/models/base.py b/machine-learning/app/models/base.py index a4e2b13c72599..8f115c77f131c 100644 --- a/machine-learning/app/models/base.py +++ b/machine-learning/app/models/base.py @@ -32,7 +32,8 @@ def __init__( self.model_name = clean_name(model_name) self.cache_dir = Path(cache_dir) if cache_dir is not None else self._cache_dir_default self.model_format = preferred_format if preferred_format is not None else self._model_format_default - self.session = session + if session is not None: + self.session = session def download(self) -> None: if not self.cached: diff --git a/machine-learning/app/test_main.py b/machine-learning/app/test_main.py index d798951372146..a424c8236a4db 100644 --- a/machine-learning/app/test_main.py +++ b/machine-learning/app/test_main.py @@ -9,7 +9,6 @@ import cv2 import numpy as np -import onnx import onnxruntime as ort import pytest from fastapi import HTTPException @@ -293,7 +292,7 @@ def test_run(self, ann_session: mock.Mock, mocker: MockerFixture) -> None: ann_session.return_value.load.return_value = 123 np_spy = mocker.spy(np, "ascontiguousarray") session = AnnSession(Path("ViT-B-32__openai")) - input1, input2 = np.random.rand(1, 3, 224, 224), np.random.rand(1, 3, 224, 224) + [input1, input2] = [np.random.rand(1, 3, 224, 224).astype(np.float32) for _ in range(2)] input_feed = {"input.1": input1, "input.2": input2} session.run(None, input_feed) @@ -468,7 +467,7 @@ def test_recognition(self, cv_image: cv2.Mat, mocker: MockerFixture) -> None: assert isinstance(call_args[0][0], np.ndarray) assert call_args[0][0].shape == (112, 112, 3) - def test_recognition_adds_batch_axis_for_ort(self, ort_session, mocker: MockerFixture) -> None: + def test_recognition_adds_batch_axis_for_ort(self, ort_session: mock.Mock, mocker: MockerFixture) -> None: onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True) update_dims = mocker.patch( "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True @@ -499,8 +498,8 @@ def test_recognition_adds_batch_axis_for_ort(self, ort_session, mocker: MockerFi assert face_recognizer.batch is True update_dims.assert_called_once_with(proto, {"input.1": ["batch", 3, 224, 224]}, {"output.1": ["batch", 800]}) onnx.save.assert_called_once_with(update_dims.return_value, face_recognizer.model_path) - - def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session, mocker: MockerFixture) -> None: + + def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session: mock.Mock, mocker: MockerFixture) -> None: onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True) update_dims = mocker.patch( "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True @@ -508,8 +507,10 @@ def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session, mocker mocker.patch("app.models.base.InferenceModel.download") mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX") - ort_session.return_value.get_inputs.return_value = [SimpleNamespace(name="input.1", shape=('batch', 3, 224, 224))] - ort_session.return_value.get_outputs.return_value = [SimpleNamespace(name="output.1", shape=('batch', 800))] + inputs = [SimpleNamespace(name="input.1", shape=("batch", 3, 224, 224))] + outputs = [SimpleNamespace(name="output.1", shape=("batch", 800))] + ort_session.return_value.get_inputs.return_value = inputs + ort_session.return_value.get_outputs.return_value = outputs face_recognizer = FaceRecognizer("buffalo_s") face_recognizer.load()