Fixed issues with Docker compose, locally works now

SYusupov · Sep 26, 2024 · 4c408cf · 4c408cf
1 parent 62bb15e
commit 4c408cf
Show file tree

Hide file tree

Showing 12 changed files with 96 additions and 87 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -68,12 +68,12 @@ jobs:
           docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/logicgpt:${{ github.sha }} .
           docker push ${{ secrets.DOCKERHUB_USERNAME }}/logicgpt:${{ github.sha }}
 
-      - name: Build and Push Docker image for Ollama
-        if: ${{ env.TO_BUILD_DOCKER == 'true' }}
-        run: |
-          cd ollama  # Navigate to the Ollama directory
-          docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/ollama:${{ github.sha }} .
-          docker push ${{ secrets.DOCKERHUB_USERNAME }}/ollama:${{ github.sha }}
+      # - name: Build and Push Docker image for Ollama
+      #   if: ${{ env.TO_BUILD_DOCKER == 'true' }}
+      #   run: |
+      #     cd ollama  # Navigate to the Ollama directory
+      #     docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/ollama:${{ github.sha }} .
+      #     docker push ${{ secrets.DOCKERHUB_USERNAME }}/ollama:${{ github.sha }}
 
       # Clean up Docker system after building and pushing the image
       - name: Clean up Docker

diff --git a/Dockerfile b/Dockerfile
@@ -9,8 +9,6 @@ ENV PYTHONDONTWRITEBYTECODE=1
 # the application crashes without emitting any logs due to buffering.
 ENV PYTHONUNBUFFERED=1
 
-WORKDIR /app
-
 RUN apt-get update && apt-get install -y g++ curl
 
 # Download dependencies as a separate step to take advantage of Docker's caching.
@@ -24,10 +22,11 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 
 RUN rm -rf /root/.cache /var/lib/apt/lists/*
 
-ENV PYTHONPATH=/app
-
 # Expose the port that the application listens on.
-EXPOSE 8000
+EXPOSE 8501
+
+# Ensure Streamlit is available
+RUN python -m streamlit --version
 
 # Run the application.
-CMD "streamlit run app_ui.py"
+CMD streamlit run /app/app_ui.py
diff --git a/app/app_ui.py b/app/app_ui.py
@@ -17,8 +17,8 @@
 ### Reasoned Response:
 """
 
-def generate_response(prompt, model='mistral'):
-    url = 'http://localhost:11434/api/generate'
+def generate_response(prompt, model='finetuned_mistral'):
+    url = 'http://ollama:11434/api/generate'
     headers = {'Content-Type': 'application/json'}
     data = {
         "model": model,

diff --git a/app/test_api.py b/app/test_api.py
@@ -1,7 +1,8 @@
 import requests
 
 # Replace FastAPI's TestClient with actual HTTP calls to the running Ollama server
-ollama_url = "http://localhost:11434/api/generate"
+ollama_url = "http://localhost:11435/api/generate"
+model_name = "finetuned_mistral"
 
 # Long instruction (Babe Ruth example) to be used in testing
 long_instruction = '''Babe was a baseball player known for his prowess at the plate and 
@@ -37,7 +38,7 @@ def test_inference_with_long_instruction_and_input():
     """Test inference with long instruction and additional input."""
     input_text = "Choose A, B, C, or D as your solution."
     payload = {
-        "model": "mistral",  # Assuming your Ollama model name is mistral
+        "model": model_name,  # Assuming your Ollama model name is mistral
         "prompt": f"### Question/Task:\n{long_instruction}\n### Input:\n{input_text}\n### Reasoned Response:"
     }
     response = generate_request(payload)
@@ -50,7 +51,7 @@ def test_inference_with_long_instruction_and_input():
 def test_inference_with_long_instruction_no_input():
     """Test inference with long instruction but no additional input."""
     payload = {
-        "model": "mistral",
+        "model": model_name,
         "prompt": f"### Question/Task:\n{long_instruction}\n### Input:\n\n### Reasoned Response:"
     }
     response = generate_request(payload)
@@ -64,7 +65,7 @@ def test_empty_instruction():
     """Test inference with an empty instruction."""
     input_text = "Choose A, B, C, or D as your solution."
     payload = {
-        "model": "mistral",
+        "model": model_name,
         "prompt": f"### Question/Task:\n\n### Input:\n{input_text}\n### Reasoned Response:"
     }
     response = generate_request(payload)
@@ -86,7 +87,7 @@ def test_invalid_request_structure():
 def test_empty_model_response():
     """Test when the model returns no tokens (edge case)."""
     payload = {
-        "model": "mistral",
+        "model": model_name,
         "prompt": f"### Question/Task:\n\n### Input:\n\n### Reasoned Response:"
     }
     response = generate_request(payload)
@@ -103,7 +104,7 @@ def test_valid_inference():
     How many four-digit numbers greater than 2999 can be formed such that the
     product of the middle two digits exceeds 5?"""
     payload = {
-        "model": "mistral",
+        "model": model_name,
         "prompt": f"### Question/Task:\n{instruction}\n### Input:\n\n### Reasoned Response:"
     }
     response = generate_request(payload)

diff --git a/compose.yaml b/compose.yaml
@@ -1,31 +1,30 @@
-version: '3.8'  # Ensure you specify the version
-
 services:
-  server:
-    image: ${DOCKERHUB_USERNAME}/logicgpt:${DOCKER_IMAGE_TAG}  # Use the image from Docker Hub
+  ollama:  # New service for running the Dockerfile in /ollama
+    image: ollama/ollama:latest
+    pull_policy: always
+    container_name: ollama
+    ports: ["11435:11434"] # Expose Ollama on port 11435 externally, map it to 11434 inside the container
+    expose:
+      - 11435
+    volumes:
+      - ./model_files:/model_files  # Mount the directory with the trained model
+    tty: true
+    entrypoint: ["/bin/sh", "/model_files/run_ollama.sh"] # Loading the finetuned Mistral with the GGUF file
+    # restart: unless-stopped
+
+  app:
+    # if to build the Dockerfile locally, uncomment lines #17-18, and comment #19
+    build:
+      context: .  # Path to the Dockerfile
+    # image: ${DOCKERHUB_USERNAME}/logicgpt:${DOCKER_IMAGE_TAG}  # Use the image from Docker Hub
+    container_name: logic_app
     ports:
       - 8501:8501
+    expose:
+      - 8501
     volumes:
       - ./app:/app
-    healthcheck:
-      test: ["CMD-SHELL", "curl -f http://localhost:8501 || exit 1"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-    restart: always
-
-  ollama:  # New service for running the Dockerfile in /ollama
-    # build:
-    #   context: ./ollama  # Path to the Dockerfile
-    #   dockerfile: Dockerfile  # Name of the Dockerfile (optional, defaults to Dockerfile)
-    image: ${DOCKERHUB_USERNAME}/ollama:${OLLAMA_IMAGE_TAG}  # Use the image from Docker Hub
-    ports:
-      - 11435:11435  # Expose Ollama API port
-    volumes:
-      - ./model_files:/model_files  # Mount model files if necessary
-    restart: always
-    healthcheck:
-      test: ["CMD-SHELL", "curl -f http://localhost:11435/api/health || exit 1"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
+      - ./model_files:/model_files
+    depends_on: # very important! otherwise ollama doesn't run
+      - ollama
+    # restart: unless-stoped
diff --git a/compose_test.yaml b/compose_test.yaml
@@ -1,16 +1,30 @@
 services:
-  server:
-    image: ${DOCKERHUB_USERNAME}/logicgpt:${DOCKER_IMAGE_TAG}
+  ollama:  # New service for running the Dockerfile in /ollama
+    image: ollama/ollama:latest
+    pull_policy: always
+    container_name: ollama
+    ports: ["11435:11434"] # Expose Ollama on port 11435 externally, map it to 11434 inside the container
+    expose:
+      - 11435
+    volumes:
+      - ./model_files:/model_files  # Mount the directory with the trained model
+    tty: true
+    entrypoint: ["/bin/sh", "/model_files/run_ollama_test.sh"] # Loading the finetuned Mistral with the GGUF file
+    # restart: unless-stopped
+
+  app:
+    # if to build the Dockerfile locally, uncomment lines #17-18, and comment #19
+    build:
+      context: .  # Path to the Dockerfile
+    # image: ${DOCKERHUB_USERNAME}/logicgpt:${DOCKER_IMAGE_TAG}  # Use the image from Docker Hub
+    container_name: logic_app
     ports:
-      - 8000:8000
+      - 8501:8501
+    expose:
+      - 8501
     volumes:
       - ./app:/app
       - ./model_files:/model_files
-
-    healthcheck:
-      test: ["CMD-SHELL", "curl -f http://localhost:8000 || exit 1"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-    entrypoint: ["/bin/sh", "-c", "pytest /app/test_api.py"]
-    restart: "no"
+    depends_on: # very important! otherwise ollama doesn't run
+      - ollama
+    # restart: unless-stoped
diff --git a/model_files/Modelfile b/model_files/Modelfile
@@ -0,0 +1 @@
+FROM /model_files/unsloth.Q4_K_M.gguf
diff --git a/model_files/run_ollama.sh b/model_files/run_ollama.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+echo "Starting Ollama server..."
+ollama serve &  # Start Ollama in the background
+
+echo "Ollama is ready, creating the model..."
+
+ollama create finetuned_mistral -f /model_files/Modelfile
+ollama run finetuned_mistral
diff --git a/model_files/run_ollama_test.sh b/model_files/run_ollama_test.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+echo "Starting Ollama server..."
+ollama serve &  # Start Ollama in the background
+OLLAMA_PID=$!  # Store the process ID of the Ollama server
+
+echo "Ollama is ready, creating the model..."
+
+ollama create finetuned_mistral -f /model_files/Modelfile
+ollama run finetuned_mistral
+
+sleep 10
+# run the pytests
+/bin/sh -c "pytest /app/test_api.py"
+
+# kill $OLLAMA_PID
diff --git a/ollama/Dockerfile b/ollama/Dockerfile
diff --git a/ollama/Modelfile b/ollama/Modelfile
diff --git a/ollama/entrypoint.sh b/ollama/entrypoint.sh