diff --git a/.github/workflows/docker_release.yml b/.github/workflows/docker_release.yml index b48a82e3ee..119d498fc6 100644 --- a/.github/workflows/docker_release.yml +++ b/.github/workflows/docker_release.yml @@ -19,6 +19,7 @@ jobs: matrix: target: - "cpu" + - "cpu-remote-inference" - "gpu" steps: @@ -72,6 +73,34 @@ jobs: # Remove image after test to avoid filling the GitHub runner and prevent its failure docker rmi "deepset/haystack:$TAG" + - name: Test non-inference image + if: contains(matrix.target, 'inference') != true + run: | + TAG="base-${{ matrix.target }}-${{ steps.meta.outputs.version }}" + + # docker commands below always output a non-empty string, otherwise the step will exit abnormally + PLATFORM="linux/amd64" + TORCH_INSTALLED=$(docker run --platform "$PLATFORM" --rm "deepset/haystack:$TAG" pip list | grep torch || echo 'not found') + [[ "$TORCH_INSTALLED" != "not found" ]] || echo "::error::Pytorch is not installed in deepset/haystack:$TAG image for $PLATFORM" + + PLATFORM="linux/arm64" + TORCH_INSTALLED=$(docker run --platform "$PLATFORM" --rm "deepset/haystack:$TAG" pip list | grep torch || echo 'not found') + [[ "$TORCH_INSTALLED" != "not found" ]] || echo "::error::Pytorch is not installed in deepset/haystack:$TAG image for $PLATFORM" + + - name: Test inference image + if: contains(matrix.target, 'inference') + run: | + TAG="base-${{ matrix.target }}-${{ steps.meta.outputs.version }}" + + # docker commands below always output a non-empty string, otherwise the step will exit abnormally + PLATFORM="linux/amd64" + TORCH_INSTALLED=$(docker run --platform "$PLATFORM" --rm "deepset/haystack:$TAG" sh -c "pip list | grep torch || echo 'not found'") + [[ "$TORCH_INSTALLED" == "not found" ]] || echo "::error::Pytorch is installed in deepset/haystack:$TAG image for $PLATFORM" + + PLATFORM="linux/arm64" + TORCH_INSTALLED=$(docker run --platform "$PLATFORM" --rm "deepset/haystack:$TAG" sh -c "pip list | grep torch || echo 'not found'") + [[ "$TORCH_INSTALLED" == "not found" ]] || echo "::error::Pytorch is installed in deepset/haystack:$TAG image for $PLATFORM" + - name: Build api images uses: docker/bake-action@v2 env: @@ -82,6 +111,36 @@ jobs: targets: ${{ matrix.target }} push: true + - name: Test inference API invocation + if: contains(matrix.target, 'inference') + env: + SERPERDEV_API_KEY: ${{ secrets.SERPERDEV_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + TAG="${{ matrix.target }}-${{ steps.meta.outputs.version }}" + + PLATFORMS=("linux/amd64" "linux/arm64") + for PLATFORM in "${PLATFORMS[@]}"; do + docker run --name test-container -d \ + --platform "$PLATFORM" \ + -e PIPELINE_YAML_PATH=/opt/venv/lib/python3.10/site-packages/rest_api/pipeline/pipelines_web_lfqa.haystack-pipeline.yaml \ + -e "RETRIEVER_PARAMS_API_KEY=$SERPERDEV_API_KEY" \ + -e "PROMPTNODE_PARAMS_API_KEY=$OPENAI_API_KEY" \ + -p 8080:8000 "deepset/haystack:$TAG" + + I=0 + until docker logs test-container 2>&1 | grep "Uvicorn running"; do + echo "Waiting" + sleep 2 + ((I++)) && ((I==100)) && echo "::error 'Timeout waiting for Uvicorn to start using deepset/haystack:$TAG image for $PLATFORM'" + done + + RESULT=$(curl -s -X POST -H "Content-Type: application/json" -d "{\"query\": \"Where in Europe, should I live?\"}" http://localhost:8080/query) + [[ -n "$RESULT" ]] || echo "::error 'No response from inference API using deepset/haystack:$TAG image for $PLATFORM'" + + docker rm -f test-container + done + - name: Get latest version of Haystack id: latest-version if: startsWith(github.ref, 'refs/tags/') diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl index 68f81cb6ad..3a0c2738e0 100644 --- a/docker/docker-bake.hcl +++ b/docker/docker-bake.hcl @@ -23,19 +23,19 @@ variable "HAYSTACK_EXTRAS" { } group "base" { - targets = ["base-cpu", "base-gpu"] + targets = ["base-cpu", "base-gpu", "base-cpu-remote-inference"] } group "api" { - targets = ["cpu", "gpu"] + targets = ["cpu", "gpu", "cpu-remote-inference"] } group "api-latest" { - targets = ["cpu-latest", "gpu-latest"] + targets = ["cpu-latest", "gpu-latest", "cpu-remote-inference-latest"] } group "all" { - targets = ["base", "base-gpu", "cpu", "gpu"] + targets = ["base", "base-gpu", "cpu", "gpu", "cpu-remote-inference"] } target "base-cpu" { @@ -50,6 +50,14 @@ target "base-cpu" { platforms = ["linux/amd64", "linux/arm64"] } +target "base-cpu-remote-inference" { + inherits = ["base-cpu"] + tags = ["${IMAGE_NAME}:base-cpu-remote-inference-${IMAGE_TAG_SUFFIX}"] + args = { + haystack_extras = notequal("",HAYSTACK_EXTRAS) ? "${HAYSTACK_EXTRAS}" : "[preprocessing]" + } +} + target "base-gpu" { dockerfile = "Dockerfile.base" tags = ["${IMAGE_NAME}:base-gpu-${IMAGE_TAG_SUFFIX}"] @@ -74,6 +82,21 @@ target "cpu" { platforms = ["linux/amd64", "linux/arm64"] } +target "cpu-remote-inference" { + dockerfile = "Dockerfile.api" + tags = ["${IMAGE_NAME}:cpu-remote-inference-${IMAGE_TAG_SUFFIX}"] + args = { + base_image = "${IMAGE_NAME}" + base_image_tag = "base-cpu-remote-inference-${BASE_IMAGE_TAG_SUFFIX}" + } + platforms = ["linux/amd64", "linux/arm64"] +} + +target "cpu-remote-inference-latest" { + inherits = ["cpu-remote-inference"] + tags = ["${IMAGE_NAME}:cpu-remote-inference"] +} + target "cpu-latest" { inherits = ["cpu"] tags = ["${IMAGE_NAME}:cpu"] diff --git a/haystack/nodes/retriever/web.py b/haystack/nodes/retriever/web.py index cd92e4bd31..1bd45f8b5d 100644 --- a/haystack/nodes/retriever/web.py +++ b/haystack/nodes/retriever/web.py @@ -81,6 +81,7 @@ def __init__( ) self.mode = mode self.cache_document_store = cache_document_store + self.document_store = cache_document_store self.cache_index = cache_index self.cache_headers = cache_headers self.cache_time = cache_time diff --git a/rest_api/rest_api/pipeline/pipelines_web_lfqa.haystack-pipeline.yaml b/rest_api/rest_api/pipeline/pipelines_web_lfqa.haystack-pipeline.yaml new file mode 100644 index 0000000000..4126ae1c62 --- /dev/null +++ b/rest_api/rest_api/pipeline/pipelines_web_lfqa.haystack-pipeline.yaml @@ -0,0 +1,42 @@ +version: ignore + +components: +- name: Retriever + params: + api_key: RETRIEVER_PARAMS_API_KEY + type: WebRetriever +- name: Shaper + params: + func: join_documents_and_scores + inputs: + documents: documents + outputs: + - documents + type: Shaper +- name: custom-at-query-time + params: + prompt: "\nSynthesize a comprehensive answer from the following most relevant\ + \ paragraphs and the given question.\nProvide a clear and concise response that\ + \ summarizes the key points and information presented in the paragraphs.\nYour\ + \ answer should be in your own words and be no longer than 50 words.\n\n\n Paragraphs:\ + \ {documents} \n\n Question: {query} \n\n Answer:\n" + type: PromptTemplate +- name: PromptNode + params: + api_key: PROMPTNODE_PARAMS_API_KEY + default_prompt_template: custom-at-query-time + max_length: 256 + model_name_or_path: gpt-3.5-turbo + type: PromptNode +pipelines: +- name: query + nodes: + - inputs: + - Query + name: Retriever + - inputs: + - Retriever + name: Shaper + - inputs: + - Shaper + name: PromptNode