feat(ml): introduce support of onnxruntime-rocm for AMD GPU

immich-app · Dec 19, 2024 · 46c505a · 46c505a
1 parent 79a780e
commit 46c505a
Show file tree

Hide file tree

Showing 14 changed files with 270 additions and 76 deletions.
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -48,21 +48,21 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        suffix: ["", "-cuda", "-openvino", "-armnn"]
+        suffix: ['', '-cuda', '-openvino', '-armnn']
     steps:
-        - name: Login to GitHub Container Registry
-          uses: docker/login-action@v3
-          with:
-            registry: ghcr.io
-            username: ${{ github.repository_owner }}
-            password: ${{ secrets.GITHUB_TOKEN }}
-        - name: Re-tag image
-          run: |
-              REGISTRY_NAME="ghcr.io"
-              REPOSITORY=${{ github.repository_owner }}/immich-machine-learning
-              TAG_OLD=main${{ matrix.suffix }}
-              TAG_NEW=${{ github.event.number == 0 && github.ref_name ||  format('pr-{0}', github.event.number)  }}${{ matrix.suffix }}
-              docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_NEW $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Re-tag image
+        run: |
+          REGISTRY_NAME="ghcr.io"
+          REPOSITORY=${{ github.repository_owner }}/immich-machine-learning
+          TAG_OLD=main${{ matrix.suffix }}
+          TAG_NEW=${{ github.event.number == 0 && github.ref_name ||  format('pr-{0}', github.event.number)  }}${{ matrix.suffix }}
+          docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_NEW $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
 
   retag_server:
     name: Re-Tag Server
@@ -71,7 +71,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        suffix: [""]
+        suffix: ['']
     steps:
       - name: Login to GitHub Container Registry
         uses: docker/login-action@v3
@@ -87,7 +87,6 @@ jobs:
           TAG_NEW=${{ github.event.number == 0 && github.ref_name ||  format('pr-{0}', github.event.number)  }}${{ matrix.suffix }}
           docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_NEW $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
 
-
   build_and_push_ml:
     name: Build and Push ML
     needs: pre-job
@@ -109,6 +108,10 @@ jobs:
             device: cuda
             suffix: -cuda
 
+          - platforms: linux/amd64
+            device: rocm
+            suffix: -rocm
+
           - platforms: linux/amd64
             device: openvino
             suffix: -openvino
@@ -192,7 +195,6 @@ jobs:
             BUILD_SOURCE_REF=${{ github.ref_name }}
             BUILD_SOURCE_COMMIT=${{ github.sha }}
 
-
   build_and_push_server:
     name: Build and Push Server
     runs-on: ubuntu-latest

diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml
@@ -85,12 +85,12 @@ services:
     image: immich-machine-learning-dev:latest
     # extends:
     #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+    #   service: cpu # set to one of [armnn, cuda, rocm, openvino, openvino-wsl] for accelerated inference
     build:
       context: ../machine-learning
       dockerfile: Dockerfile
       args:
-        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+        - DEVICE=cpu # set to one of [armnn, cuda, rocm, openvino, openvino-wsl] for accelerated inference
     ports:
       - 3003:3003
     volumes:

diff --git a/docker/docker-compose.prod.yml b/docker/docker-compose.prod.yml
@@ -29,12 +29,12 @@ services:
     image: immich-machine-learning:latest
     # extends:
     #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+    #   service: cpu # set to one of [armnn, cuda, rocm, openvino, openvino-wsl] for accelerated inference
     build:
       context: ../machine-learning
       dockerfile: Dockerfile
       args:
-        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+        - DEVICE=cpu # set to one of [armnn, cuda, rocm, openvino, openvino-wsl] for accelerated inference
     ports:
       - 3003:3003
     volumes:

diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
@@ -32,12 +32,12 @@ services:
 
   immich-machine-learning:
     container_name: immich_machine_learning
-    # For hardware acceleration, add one of -[armnn, cuda, openvino] to the image tag.
+    # For hardware acceleration, add one of -[armnn, cuda, rocm, openvino] to the image tag.
     # Example tag: ${IMMICH_VERSION:-release}-cuda
     image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}
     # extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/ml-hardware-acceleration
     #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
+    #   service: cpu # set to one of [armnn, cuda, rocm, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
     volumes:
       - model-cache:/cache
     env_file:

diff --git a/docker/hwaccel.ml.yml b/docker/hwaccel.ml.yml
@@ -26,6 +26,13 @@ services:
               capabilities:
                 - gpu
 
+  rocm:
+    group_add:
+      - video
+    devices:
+      - /dev/dri:/dev/dri
+      - /dev/kfd:/dev/kfd
+
   openvino:
     device_cgroup_rules:
       - 'c 189:* rmw'

diff --git a/docs/docs/features/ml-hardware-acceleration.md b/docs/docs/features/ml-hardware-acceleration.md
@@ -11,6 +11,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 
 - ARM NN (Mali)
 - CUDA (NVIDIA GPUs with [compute capability](https://developer.nvidia.com/cuda-gpus) 5.2 or higher)
+- ROCM (AMD GPUs)
 - OpenVINO (Intel discrete GPUs such as Iris Xe and Arc)
 
 ## Limitations
@@ -41,6 +42,10 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 - The installed driver must be >= 535 (it must support CUDA 12.2).
 - On Linux (except for WSL2), you also need to have [NVIDIA Container Toolkit][nvct] installed.
 
+#### ROCM
+
+- The GPU must be supported by ROCM (or use `HSA_OVERRIDE_GFX_VERSION=<a supported version, ie 10.3.0>`)
+
 #### OpenVINO
 
 - The server must have a discrete GPU, i.e. Iris Xe or Arc. Expect issues when attempting to use integrated graphics.
@@ -50,12 +55,12 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 
 1. If you do not already have it, download the latest [`hwaccel.ml.yml`][hw-file] file and ensure it's in the same folder as the `docker-compose.yml`.
 2. In the `docker-compose.yml` under `immich-machine-learning`, uncomment the `extends` section and change `cpu` to the appropriate backend.
-3. Still in `immich-machine-learning`, add one of -[armnn, cuda, openvino] to the `image` section's tag at the end of the line.
+3. Still in `immich-machine-learning`, add one of -[armnn, cuda, rocm, openvino] to the `image` section's tag at the end of the line.
 4. Redeploy the `immich-machine-learning` container with these updated settings.
 
 ### Confirming Device Usage
 
-You can confirm the device is being recognized and used by checking its utilization. There are many tools to display this, such as `nvtop` for NVIDIA or Intel and `intel_gpu_top` for Intel.
+You can confirm the device is being recognized and used by checking its utilization. There are many tools to display this, such as `nvtop` for NVIDIA or Intel, `intel_gpu_top` for Intel, and `radeontop` for AMD.
 
 You can also check the logs of the `immich-machine-learning` container. When a Smart Search or Face Detection job begins, or when you search with text in Immich, you should either see a log for `Available ORT providers` containing the relevant provider (e.g. `CUDAExecutionProvider` in the case of CUDA), or a `Loaded ANN model` log entry without errors in the case of ARM NN.
 

diff --git a/docs/docs/guides/remote-machine-learning.md b/docs/docs/guides/remote-machine-learning.md
@@ -23,12 +23,12 @@ name: immich_remote_ml
 services:
   immich-machine-learning:
     container_name: immich_machine_learning
-    # For hardware acceleration, add one of -[armnn, cuda, openvino] to the image tag.
+    # For hardware acceleration, add one of -[armnn, cuda, rocm, openvino] to the image tag.
     # Example tag: ${IMMICH_VERSION:-release}-cuda
     image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}
     # extends:
     #   file: hwaccel.ml.yml
-    #   service: # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
+    #   service: # set to one of [armnn, cuda, rocm, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
     volumes:
       - model-cache:/cache
     restart: always

diff --git a/machine-learning/Dockerfile b/machine-learning/Dockerfile
@@ -15,6 +15,40 @@ RUN mkdir /opt/armnn && \
     cd /opt/ann && \
     sh build.sh
 
+# Warning: 26.3Gb of disk space required to pull this image
+# https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm
+FROM rocm/dev-ubuntu-22.04:6.1.2-complete as builder-rocm
+
+WORKDIR /code
+
+RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
+# Install same version as the Dockerfile provided by onnxruntime
+RUN wget https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.sh && \
+    chmod +x cmake-3.27.3-linux-x86_64.sh && \
+    mkdir -p /code/cmake-3.27.3-linux-x86_64 && \
+    ./cmake-3.27.3-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.27.3-linux-x86_64 && \
+    rm cmake-3.27.3-linux-x86_64.sh
+
+ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH}
+
+# Prepare onnxruntime repository & build onnxruntime
+RUN git clone --single-branch --branch v1.18.1 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
+WORKDIR /code/onnxruntime
+# EDIT PR
+# While there's still this PR open, we need to compile on the branch of the PR
+# https://github.com/microsoft/onnxruntime/pull/19567
+COPY ./rocm-PR19567.patch /tmp/
+RUN git apply /tmp/rocm-PR19567.patch
+# END EDIT PR
+RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
+# I ran into a compilation error when parallelizing the build
+# I used 12 threads to build onnxruntime, but it needs more than 16GB of RAM, and that's the amount of RAM I have on my machine
+# I lowered the number of threads to 8, and it worked
+# Even with 12 threads, the compilation took more than 1,5 hours to fail
+RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 9 --cmake_extra_defines\
+    ONNXRUNTIME_VERSION=1.18.1 --use_rocm --rocm_home=/opt/rocm
+RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/
+
 FROM builder-${DEVICE} AS builder
 
 ARG DEVICE
@@ -32,6 +66,9 @@ RUN poetry config installer.max-workers 10 && \
 RUN python3 -m venv /opt/venv
 
 COPY poetry.lock pyproject.toml ./
+RUN if [ "$DEVICE" = "rocm" ]; then \
+    poetry add /opt/onnxruntime_rocm-*.whl; \
+    fi
 RUN poetry install --sync --no-interaction --no-ansi --no-root --with ${DEVICE} --without dev
 
 FROM python:3.11-slim-bookworm@sha256:370c586a6ffc8c619e6d652f81c094b34b14b8f2fb9251f092de23f16e299b78 AS prod-cpu
@@ -80,11 +117,15 @@ COPY --from=builder-armnn \
     /opt/ann/build.sh \
     /opt/armnn/
 
+FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS prod-rocm
+
+
 FROM prod-${DEVICE} AS prod
+
 ARG DEVICE
 
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends tini $(if ! [ "$DEVICE" = "openvino" ]; then echo "libmimalloc2.0"; fi) && \
+    apt-get install -y --no-install-recommends tini $(if ! [ "$DEVICE" = "openvino" ] && ! [ "$DEVICE" = "rocm" ]; then echo "libmimalloc2.0"; fi) && \
     apt-get autoremove -yqq && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*

diff --git a/machine-learning/README.md b/machine-learning/README.md
@@ -7,7 +7,7 @@
 
 This project uses [Poetry](https://python-poetry.org/docs/#installation), so be sure to install it first.
 Running `poetry install --no-root --with dev --with cpu` will install everything you need in an isolated virtual environment.
-CUDA and OpenVINO are supported as acceleration APIs. To use them, you can replace `--with cpu` with either of `--with cuda` or `--with openvino`. In the case of CUDA, a [compute capability](https://developer.nvidia.com/cuda-gpus) of 5.2 or higher is required.
+CUDA, ROCM and OpenVINO are supported as acceleration APIs. To use them, you can replace `--with cpu` with either of `--with cuda`, `--with rocm` or `--with openvino`. In the case of CUDA, a [compute capability](https://developer.nvidia.com/cuda-gpus) of 5.2 or higher is required.
 
 To add or remove dependencies, you can use the commands `poetry add $PACKAGE_NAME` and `poetry remove $PACKAGE_NAME`, respectively.
 Be sure to commit the `poetry.lock` and `pyproject.toml` files with `poetry lock --no-update` to reflect any changes in dependencies.
@@ -37,4 +37,4 @@ This project utilizes facial recognition models from the [InsightFace](https://g
 ## License and Use Restrictions
 We have received permission to use the InsightFace facial recognition models in our project, as granted via email by Jia Guo (guojia@insightface.ai) on 18th March 2023. However, it's important to note that this permission does not extend to the redistribution or commercial use of their models by third parties. Users and developers interested in using these models should review the licensing terms provided in the InsightFace GitHub repository.
 
-For more information on the capabilities of the InsightFace models and to ensure compliance with their license, please refer to their [official repository](https://github.com/deepinsight/insightface). Adhering to the specified licensing terms is crucial for the respectful and lawful use of their work.
+For more information on the capabilities of the InsightFace models and to ensure compliance with their license, please refer to their [official repository](https://github.com/deepinsight/insightface). Adhering to the specified licensing terms is crucial for the respectful and lawful use of their work.
diff --git a/machine-learning/app/models/constants.py b/machine-learning/app/models/constants.py
@@ -63,7 +63,7 @@
 }
 
 
-SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
+SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "ROCMExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
 
 
 def get_model_source(model_name: str) -> ModelSource | None:

diff --git a/machine-learning/app/sessions/ort.py b/machine-learning/app/sessions/ort.py
@@ -88,7 +88,7 @@ def _provider_options_default(self) -> list[dict[str, Any]]:
             match provider:
                 case "CPUExecutionProvider":
                     options = {"arena_extend_strategy": "kSameAsRequested"}
-                case "CUDAExecutionProvider":
+                case "CUDAExecutionProvider" | "ROCMExecutionProvider":
                     options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id}
                 case "OpenVINOExecutionProvider":
                     options = {
-Original file line number
+Diff line change
@@ Expand Up / @@ -63,7 +63,7 @@ @@
     }
-    SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
+    SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "ROCMExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
     def get_model_source(model_name: str) -> ModelSource | None:
@@ Expand Down @@