Skip to content

Commit

Permalink
Fixed issues with Docker compose, locally works now
Browse files Browse the repository at this point in the history
  • Loading branch information
SYusupov committed Sep 26, 2024
1 parent 62bb15e commit 4c408cf
Show file tree
Hide file tree
Showing 12 changed files with 96 additions and 87 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ jobs:
docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/logicgpt:${{ github.sha }} .
docker push ${{ secrets.DOCKERHUB_USERNAME }}/logicgpt:${{ github.sha }}
- name: Build and Push Docker image for Ollama
if: ${{ env.TO_BUILD_DOCKER == 'true' }}
run: |
cd ollama # Navigate to the Ollama directory
docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/ollama:${{ github.sha }} .
docker push ${{ secrets.DOCKERHUB_USERNAME }}/ollama:${{ github.sha }}
# - name: Build and Push Docker image for Ollama
# if: ${{ env.TO_BUILD_DOCKER == 'true' }}
# run: |
# cd ollama # Navigate to the Ollama directory
# docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/ollama:${{ github.sha }} .
# docker push ${{ secrets.DOCKERHUB_USERNAME }}/ollama:${{ github.sha }}

# Clean up Docker system after building and pushing the image
- name: Clean up Docker
Expand Down
11 changes: 5 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ ENV PYTHONDONTWRITEBYTECODE=1
# the application crashes without emitting any logs due to buffering.
ENV PYTHONUNBUFFERED=1

WORKDIR /app

RUN apt-get update && apt-get install -y g++ curl

# Download dependencies as a separate step to take advantage of Docker's caching.
Expand All @@ -24,10 +22,11 @@ RUN --mount=type=cache,target=/root/.cache/pip \

RUN rm -rf /root/.cache /var/lib/apt/lists/*

ENV PYTHONPATH=/app

# Expose the port that the application listens on.
EXPOSE 8000
EXPOSE 8501

# Ensure Streamlit is available
RUN python -m streamlit --version

# Run the application.
CMD "streamlit run app_ui.py"
CMD streamlit run /app/app_ui.py
4 changes: 2 additions & 2 deletions app/app_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
### Reasoned Response:
"""

def generate_response(prompt, model='mistral'):
url = 'http://localhost:11434/api/generate'
def generate_response(prompt, model='finetuned_mistral'):
url = 'http://ollama:11434/api/generate'
headers = {'Content-Type': 'application/json'}
data = {
"model": model,
Expand Down
13 changes: 7 additions & 6 deletions app/test_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import requests

# Replace FastAPI's TestClient with actual HTTP calls to the running Ollama server
ollama_url = "http://localhost:11434/api/generate"
ollama_url = "http://localhost:11435/api/generate"
model_name = "finetuned_mistral"

# Long instruction (Babe Ruth example) to be used in testing
long_instruction = '''Babe was a baseball player known for his prowess at the plate and
Expand Down Expand Up @@ -37,7 +38,7 @@ def test_inference_with_long_instruction_and_input():
"""Test inference with long instruction and additional input."""
input_text = "Choose A, B, C, or D as your solution."
payload = {
"model": "mistral", # Assuming your Ollama model name is mistral
"model": model_name, # Assuming your Ollama model name is mistral
"prompt": f"### Question/Task:\n{long_instruction}\n### Input:\n{input_text}\n### Reasoned Response:"
}
response = generate_request(payload)
Expand All @@ -50,7 +51,7 @@ def test_inference_with_long_instruction_and_input():
def test_inference_with_long_instruction_no_input():
"""Test inference with long instruction but no additional input."""
payload = {
"model": "mistral",
"model": model_name,
"prompt": f"### Question/Task:\n{long_instruction}\n### Input:\n\n### Reasoned Response:"
}
response = generate_request(payload)
Expand All @@ -64,7 +65,7 @@ def test_empty_instruction():
"""Test inference with an empty instruction."""
input_text = "Choose A, B, C, or D as your solution."
payload = {
"model": "mistral",
"model": model_name,
"prompt": f"### Question/Task:\n\n### Input:\n{input_text}\n### Reasoned Response:"
}
response = generate_request(payload)
Expand All @@ -86,7 +87,7 @@ def test_invalid_request_structure():
def test_empty_model_response():
"""Test when the model returns no tokens (edge case)."""
payload = {
"model": "mistral",
"model": model_name,
"prompt": f"### Question/Task:\n\n### Input:\n\n### Reasoned Response:"
}
response = generate_request(payload)
Expand All @@ -103,7 +104,7 @@ def test_valid_inference():
How many four-digit numbers greater than 2999 can be formed such that the
product of the middle two digits exceeds 5?"""
payload = {
"model": "mistral",
"model": model_name,
"prompt": f"### Question/Task:\n{instruction}\n### Input:\n\n### Reasoned Response:"
}
response = generate_request(payload)
Expand Down
51 changes: 25 additions & 26 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -1,31 +1,30 @@
version: '3.8' # Ensure you specify the version

services:
server:
image: ${DOCKERHUB_USERNAME}/logicgpt:${DOCKER_IMAGE_TAG} # Use the image from Docker Hub
ollama: # New service for running the Dockerfile in /ollama
image: ollama/ollama:latest
pull_policy: always
container_name: ollama
ports: ["11435:11434"] # Expose Ollama on port 11435 externally, map it to 11434 inside the container
expose:
- 11435
volumes:
- ./model_files:/model_files # Mount the directory with the trained model
tty: true
entrypoint: ["/bin/sh", "/model_files/run_ollama.sh"] # Loading the finetuned Mistral with the GGUF file
# restart: unless-stopped

app:
# if to build the Dockerfile locally, uncomment lines #17-18, and comment #19
build:
context: . # Path to the Dockerfile
# image: ${DOCKERHUB_USERNAME}/logicgpt:${DOCKER_IMAGE_TAG} # Use the image from Docker Hub
container_name: logic_app
ports:
- 8501:8501
expose:
- 8501
volumes:
- ./app:/app
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8501 || exit 1"]
interval: 30s
timeout: 10s
retries: 3
restart: always

ollama: # New service for running the Dockerfile in /ollama
# build:
# context: ./ollama # Path to the Dockerfile
# dockerfile: Dockerfile # Name of the Dockerfile (optional, defaults to Dockerfile)
image: ${DOCKERHUB_USERNAME}/ollama:${OLLAMA_IMAGE_TAG} # Use the image from Docker Hub
ports:
- 11435:11435 # Expose Ollama API port
volumes:
- ./model_files:/model_files # Mount model files if necessary
restart: always
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:11435/api/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
- ./model_files:/model_files
depends_on: # very important! otherwise ollama doesn't run
- ollama
# restart: unless-stoped
36 changes: 25 additions & 11 deletions compose_test.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
services:
server:
image: ${DOCKERHUB_USERNAME}/logicgpt:${DOCKER_IMAGE_TAG}
ollama: # New service for running the Dockerfile in /ollama
image: ollama/ollama:latest
pull_policy: always
container_name: ollama
ports: ["11435:11434"] # Expose Ollama on port 11435 externally, map it to 11434 inside the container
expose:
- 11435
volumes:
- ./model_files:/model_files # Mount the directory with the trained model
tty: true
entrypoint: ["/bin/sh", "/model_files/run_ollama_test.sh"] # Loading the finetuned Mistral with the GGUF file
# restart: unless-stopped

app:
# if to build the Dockerfile locally, uncomment lines #17-18, and comment #19
build:
context: . # Path to the Dockerfile
# image: ${DOCKERHUB_USERNAME}/logicgpt:${DOCKER_IMAGE_TAG} # Use the image from Docker Hub
container_name: logic_app
ports:
- 8000:8000
- 8501:8501
expose:
- 8501
volumes:
- ./app:/app
- ./model_files:/model_files

healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000 || exit 1"]
interval: 30s
timeout: 10s
retries: 3
entrypoint: ["/bin/sh", "-c", "pytest /app/test_api.py"]
restart: "no"
depends_on: # very important! otherwise ollama doesn't run
- ollama
# restart: unless-stoped
1 change: 1 addition & 0 deletions model_files/Modelfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FROM /model_files/unsloth.Q4_K_M.gguf
9 changes: 9 additions & 0 deletions model_files/run_ollama.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

echo "Starting Ollama server..."
ollama serve & # Start Ollama in the background

echo "Ollama is ready, creating the model..."

ollama create finetuned_mistral -f /model_files/Modelfile
ollama run finetuned_mistral
16 changes: 16 additions & 0 deletions model_files/run_ollama_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

echo "Starting Ollama server..."
ollama serve & # Start Ollama in the background
OLLAMA_PID=$! # Store the process ID of the Ollama server

echo "Ollama is ready, creating the model..."

ollama create finetuned_mistral -f /model_files/Modelfile
ollama run finetuned_mistral

sleep 10
# run the pytests
/bin/sh -c "pytest /app/test_api.py"

# kill $OLLAMA_PID
13 changes: 0 additions & 13 deletions ollama/Dockerfile

This file was deleted.

1 change: 0 additions & 1 deletion ollama/Modelfile

This file was deleted.

16 changes: 0 additions & 16 deletions ollama/entrypoint.sh

This file was deleted.

0 comments on commit 4c408cf

Please sign in to comment.