libs/infinity_emb/Dockerfile.jinja2

# Autogenerated warning:
# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd

FROM {{ base_image }} AS base

ENV PYTHONUNBUFFERED=1 \
    # pip
    PIP_NO_CACHE_DIR=off \
    PIP_DISABLE_PIP_VERSION_CHECK=on \
    PIP_DEFAULT_TIMEOUT=100 \
    # make poetry create the virtual environment in the project's root
    # it gets named `.venv`
    POETRY_VIRTUALENVS_CREATE="{{poetry_virtualenvs_create | default('true')}}" \
    POETRY_VIRTUALENVS_IN_PROJECT="{{poetry_virtualenvs_in_project | default('true')}}" \
    POETRY_NO_INTERACTION=1 \
    # huggingface     
    HF_HUB_ENABLE_HF_TRANSFER=1 \
    # extras
    EXTRAS="{{poetry_extras | default('all')}}" \
    PYTHON="{{python_version | default('python3.11')}}"
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
{{extra_env_variables | default('')}}
WORKDIR /app

FROM base as builder
# Set the working directory for the app
# Define the version of Poetry to install (default is 1.8.4)
# Define the directory to install Poetry to (default is /opt/poetry)
ARG POETRY_VERSION=1.8.4
ARG POETRY_HOME=/opt/poetry
# Create a Python virtual environment for Poetry and install it
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON -
ENV PATH=$POETRY_HOME/bin:$PATH
# Test if Poetry is installed in the expected path
RUN echo "Poetry version:" && poetry --version
# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
COPY poetry.lock poetry.toml pyproject.toml README.md /app/
# Install dependencies only
{{pyproject_sed | default('#')}}
{{main_install}}
COPY infinity_emb infinity_emb
# Install dependency with infinity_emb package
{{main_install|replace("--no-root","")}}
{{extra_installs_main | default('#')}}


FROM builder as testing
# install lint and test dependencies
{{main_install|replace("--without", "--with")|replace("--no-root","")}}
# lint 
RUN poetry run ruff check .
RUN poetry run mypy .
# pytest
COPY tests tests
# run end to end tests because of duration of build in github ci.
# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
RUN if [ -z "$TARGETPLATFORM" ]; then \
      ARCH=$(uname -m); \
      if [ "$ARCH" = "x86_64" ]; then \
          TARGETPLATFORM="linux/amd64"; \
      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
          TARGETPLATFORM="linux/arm64"; \
      else \
          echo "Unsupported architecture: $ARCH"; exit 1; \
      fi; \
    fi; \
    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
    else \
        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py tests/end_to_end/test_sentence_transformers.py  -m "not performance" -x ; \
    fi
RUN echo "all tests passed" > "test_results.txt"


# Use a multi-stage build -> production version, with download
FROM base AS tested-builder
COPY --from=builder /app /app
# force testing stage to run
COPY --from=testing /app/test_results.txt /app/test_results.txt
ENV HF_HOME=/app/.cache/huggingface
ENV PATH=/app/.venv/bin:$PATH

# Use a multi-stage build -> production version, with download:
# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t michaelf34/infinity:0.0.71-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
FROM tested-builder AS production-with-download
# collect model name and engine from build args
ARG MODEL_NAME
ARG ENGINE
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi

ENV INFINITY_MODEL_ID=$MODEL_NAME
ENV INFINITY_ENGINE=$ENGINE
# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --preload-only --no-model-warmup || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# Use a multiçç-stage build -> production version
FROM tested-builder AS production
ENTRYPOINT ["infinity_emb"]