diff --git a/docs/development.md b/docs/development.md index b33c8d1..b4a6b34 100644 --- a/docs/development.md +++ b/docs/development.md @@ -69,7 +69,54 @@ npm run pretty npm run type-check ``` -### Summary of Server-Side Rendering and Client-Side Data Handling for Jobs and Chat Routes +## Local Dev Chat Environment + +### 1) Using the ilab command line tool + +For the chat functionality to work you need a ilab model chat instance. To run this locally: + +`cd server` + +[https://github.com/instructlab/instructlab?tab=readme-ov-file#-getting-started](https://github.com/instructlab/instructlab?tab=readme-ov-file#-getting-started) + +After you use the `ilab serve` command you should have, by default, a chat server instance running on port 8000. + +### 2) Using Podman + +#### Current issues + +- The docker image that runs the server does not utilise Mac Metal GPU and therefore is very slow when answering prompts +- The docker image is very large as it contains the model itself. Potential to have the model incoperated via a docker volume to reduce the size of the actual image. + +`docker run -p 8000:8000 aevo987654/instructlab_chat_8000:v2` + +This should run a server on port 8000 + +### Configuring the chat environment to use a local ilab model chat instance + +Return back to the root of the repo (ui) and run `npm run dev` and visit [http://localhost:3000/playground/endpoints](http://localhost:3000/playground/endpoints). + +Click the `Add Endpoint` button and a popup modal will appear. + +![enter image description here](../public/dev-local-chat-server/add-endpoint.png) + +- URL - add `http://127.0.0.1:8000` +- Model Name - add `merlinite-7b-lab-Q4_K_M.gguf` +- API Key - add some random characters + +Click the `Save` button + +![enter image description here](../public/dev-local-chat-server/added-endpoint.png) + +Go to the chat interface [http://localhost:3000/playground/chat](http://localhost:3000/playground/chat) and select the `merlinite-7b-lab-Q4_K_M.gguf` model. + +![enter image description here](../public/dev-local-chat-server/select-the-correct-model.png) + +The chat interface should now use the server. + +![enter image description here](../public/dev-local-chat-server/successful-chat.png) + +## Summary of Server-Side Rendering and Client-Side Data Handling for Jobs and Chat Routes We are leveraging Next.js's app router to handle [server-side rendering](https://nextjs.org/docs/pages/building-your-application/rendering/server-side-rendering) @@ -77,7 +124,7 @@ We are leveraging Next.js's app router to handle Below is a summary of how we manage server-side rendering and client-side data handling for these routes. -#### Server-Side Rendering (SSR) +### Server-Side Rendering (SSR) **API Routes**: diff --git a/public/dev-local-chat-server/add-endpoint.png b/public/dev-local-chat-server/add-endpoint.png new file mode 100644 index 0000000..c415600 Binary files /dev/null and b/public/dev-local-chat-server/add-endpoint.png differ diff --git a/public/dev-local-chat-server/added-endpoint.png b/public/dev-local-chat-server/added-endpoint.png new file mode 100644 index 0000000..9d81e01 Binary files /dev/null and b/public/dev-local-chat-server/added-endpoint.png differ diff --git a/public/dev-local-chat-server/select-the-correct-model.png b/public/dev-local-chat-server/select-the-correct-model.png new file mode 100644 index 0000000..5bda0e7 Binary files /dev/null and b/public/dev-local-chat-server/select-the-correct-model.png differ diff --git a/public/dev-local-chat-server/successful-chat.png b/public/dev-local-chat-server/successful-chat.png new file mode 100644 index 0000000..9b49c34 Binary files /dev/null and b/public/dev-local-chat-server/successful-chat.png differ diff --git a/server/Containerfile b/server/Containerfile new file mode 100644 index 0000000..18da2bc --- /dev/null +++ b/server/Containerfile @@ -0,0 +1,21 @@ +FROM python:3.11 + +# Set working directory +WORKDIR /app + +RUN pip install --upgrade pip +RUN pip install --no-cache-dir instructlab==0.16.1 + +# Copy project files to the working directory +COPY config.yaml . + +# Download the merlinite model +RUN ilab download + +# Copy project files to the working directory +COPY . . + +EXPOSE 8000 + +# Run the chat server with the specified model family and model file +CMD ["ilab", "serve", "--model-family", "merlinite", "--model-path", "models/merlinite-7b-lab-Q4_K_M.gguf"] \ No newline at end of file diff --git a/server/config.yaml b/server/config.yaml new file mode 100644 index 0000000..ee4f316 --- /dev/null +++ b/server/config.yaml @@ -0,0 +1,26 @@ +chat: + context: default + greedy_mode: false + logs_dir: data/chatlogs + max_tokens: null + model: models/merlinite-7b-lab-Q4_K_M.gguf + session: null + vi_mode: false + visible_overflow: true +general: + log_level: INFO +generate: + chunk_word_count: 1000 + model: models/merlinite-7b-lab-Q4_K_M.gguf + num_cpus: 10 + num_instructions: 100 + output_dir: generated + prompt_file: prompt.txt + seed_file: seed_tasks.json + taxonomy_base: origin/main + taxonomy_path: taxonomy +serve: + gpu_layers: -1 + host_port: 0.0.0.0:8000 + max_ctx_size: 4096 + model_path: models/merlinite-7b-lab-Q4_K_M.gguf