Merge pull request #102 from acon96/release/v0.2.11

Release v0.2.11
acon96 · Apr 7, 2024 · da05917 · da05917
2 parents d9d42b5 + 5afc501
commit da05917
Show file tree

Hide file tree

Showing 14 changed files with 612 additions and 165 deletions.
diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
@@ -0,0 +1,88 @@
+name: Create Release
+
+on:
+  workflow_dispatch:
+    inputs:
+      release_notes: 
+        description: "Release Notes"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.arch }} (HA ${{ matrix.home_assistant_version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        home_assistant_version: ["2023.12.4", "2024.2.1"]
+        arch: ["aarch64", "armhf", "amd64", "i386"]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Read llama-cpp-python version
+        run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV
+
+      - name: Build artifact
+        uses: uraimo/run-on-arch-action@v2
+        id: build
+        with:
+          arch: none
+          distro: none
+          base_image: homeassistant/${{ matrix.arch }}-homeassistant:${{ matrix.home_assistant_version }}
+
+          # Create an artifacts directory
+          setup: |
+            mkdir -p "${PWD}/artifacts"
+
+          # Mount the artifacts directory as /artifacts in the container
+          dockerRunArgs: |
+            --volume "${PWD}/artifacts:/artifacts"
+
+          # The shell to run commands with in the container
+          shell: /bin/bash
+
+          # Produce a binary artifact and place it in the mounted volume
+          run: |
+            apk update
+            apk add build-base python3-dev cmake
+            pip3 install build
+
+            cd /tmp
+            git clone --quiet --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch "v${{ env.EMBEDDED_LLAMA_CPP_PYTHON_VERSION }}"
+            cd llama-cpp-python
+            
+            export CMAKE_ARGS="-DLLAVA_BUILD=OFF"
+            python3 -m build --wheel
+            cp -f ./dist/*.whl /artifacts/
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          path: ./artifacts/*.whl
+          name: artifact_${{ matrix.arch }}_${{ matrix.home_assistant_version }}
+
+  release:
+    name: Create Release
+    needs: [ build_wheels ]
+    runs-on: ubuntu-latest
+    if: "startsWith(github.event.ref, 'refs/tags/v')" # only create a release if this was run on a tag
+
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: dist
+          merge-multiple: true
+
+      - name: Create GitHub release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: dist/*
+          body: ${{ inputs.release_notes }}
+          make_latest: true
diff --git a/README.md b/README.md
@@ -129,6 +129,7 @@ In order to facilitate running the project entirely on the system where Home Ass
 ## Version History
 | Version | Description                                                                                                                                                                                      |
 | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| v0.2.11 | Add prompt caching, expose llama.cpp runtime settings, build llama-cpp-python wheels using GitHub actions, and install wheels directly from GitHub                                               |
 | v0.2.10 | Allow configuring the model parameters during initial setup, attempt to auto-detect defaults for recommended models, Fix to allow lights to be set to max brightness                             |
 | v0.2.9  | Fix HuggingFace Download, Fix llama.cpp wheel installation, Fix light color changing, Add in-context-learning support                                                                            | 
 | v0.2.8  | Fix ollama model names with colons                                                                                                                                                               |

diff --git a/TODO.md b/TODO.md
@@ -1,15 +1,12 @@
 # TODO
-- [ ] setup github actions to build wheels that are optimized for RPIs??
-- [ ] setup github actions to publish docker images for text-gen-webui addon
 - [ ] detection/mitigation of too many entities being exposed & blowing out the context length
 - [ ] areas/room support  
-- [ ] figure out DPO for refusals + fixing incorrect entity id  
+- [ ] figure out DPO to improve response quality
+- [x] setup github actions to build wheels that are optimized for RPIs
 - [x] mixtral + prompting (no fine tuning)  
     - add in context learning variables to sys prompt template
     - add new options to setup process for setting prompt style + picking fine-tuned/ICL  
-- [ ] prime kv cache with current "state" so that requests are faster  
-- [ ] support fine-tuning with RoPE for longer contexts  
-- [ ] support config via yaml instead of configflow
+- [x] prime kv cache with current "state" so that requests are faster  
 - [x] ChatML format (actually need to add special tokens)  
 - [x] Vicuna dataset merge (yahma/alpaca-cleaned)  
 - [x] Phi-2 fine tuning  
@@ -19,7 +16,6 @@
 - [x] Licenses + Attributions  
 - [x] Finish Readme/docs for initial release  
 - [x] Function calling as JSON  
-- [ ] multi-turn prompts; better instruct dataset like dolphin/wizardlm?  
 - [x] Fine tune Phi-1.5 version  
 - [x] make llama-cpp-python wheels for "llama-cpp-python>=0.2.24"  
 - [x] make a proper evaluation framework to run. not just loss. should test accuracy on the function calling  

diff --git a/addon/README.md b/addon/README.md
@@ -1,4 +1,4 @@
 # text-generation-webui - Home Assistant Addon
-NOTE: This is super experimental and may or may not work on a Raspberry Pi
+Installs text-generation-webui into a docker container using CPU only mode (llama.cpp)
 
-Installs text-generation-webui into a docker container using CPU only mode (llama.cpp)
+NOTE: This addon is not the preferred way to run LLama.cpp as part of Home Assistant and will not be updated.