Add benchmark and docs

substratusai · Dec 16, 2024 · c91eb71 · c91eb71
1 parent 83433cc
commit c91eb71
Show file tree

Hide file tree

Showing 20 changed files with 25,181 additions and 22,592 deletions.
diff --git a/benchmarks/chat/.dockerignore b/benchmarks/chat/.dockerignore
@@ -0,0 +1 @@
+data/ShareGPT_V3_unfiltered_cleaned_split.json
diff --git a/benchmarks/chat/.gitignore b/benchmarks/chat/.gitignore
@@ -1 +1 @@
-ShareGPT_V3_unfiltered_cleaned_split.json
+data/ShareGPT_V3_unfiltered_cleaned_split.json
diff --git a/benchmarks/chat/Dockerfile b/benchmarks/chat/Dockerfile
@@ -0,0 +1,14 @@
+FROM ubuntu:20.04
+
+RUN apt-get update && apt-get install -y build-essential make python3 wget vim
+
+# Install k6 binary.
+ENV K6_VERSION=v0.55.0
+RUN wget https://github.com/grafana/k6/releases/download/${K6_VERSION}/k6-${K6_VERSION}-linux-amd64.tar.gz && tar -zxvf k6-${K6_VERSION}-linux-amd64.tar.gz && mv k6-${K6_VERSION}-linux-amd64/k6 /usr/local/bin && rm k6-${K6_VERSION}-linux-amd64.tar.gz
+
+WORKDIR /work
+
+COPY ./k6.js .
+COPY ./Makefile .
+COPY ./data ./data
+COPY ./scenarios ./scenarios
diff --git a/benchmarks/chat/Makefile b/benchmarks/chat/Makefile
@@ -1,5 +1,13 @@
-ShareGPT_V3_unfiltered_cleaned_split.json:
-	wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
+data/ShareGPT_V3_unfiltered_cleaned_split.json:
+	cd data && wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
 
-prepare-message-threads: ShareGPT_V3_unfiltered_cleaned_split.json
-	python prepare-message-threads.py
+.PHONY: data
+data: data/ShareGPT_V3_unfiltered_cleaned_split.json
+	cd data && python prepare-message-threads.py
+
+configure:
+	kubectl create cm chat-benchmark --from-file=Makefile --from-file=k6.js --from-file=prepare-message-threads.py
+
+run: 
+	ls scenarios/${SCENARIO}
+	CONFIG_DIR=scenarios/${SCENARIO} DATA_DIR=data MODEL_ADDR=kubeai/openai k6 run ./k6.js
diff --git a/benchmarks/chat/data/message-threads.json b/benchmarks/chat/data/message-threads.json
diff --git a/benchmarks/chat/prepare-message-threads.py → ...arks/chat/data/prepare-message-threads.py b/benchmarks/chat/prepare-message-threads.py → ...arks/chat/data/prepare-message-threads.py
@@ -21,8 +21,7 @@ def main():
                     userMessages.append(content)
                     totalContentLength += len(content)
 
-            # Avoid adding conversations that are too long (will exceed context window).
-            if totalContentLength > 500:
+            if totalContentLength < 2500:
                 continue
 
             if len(userMessages) < 5:

diff --git a/benchmarks/chat/k6.js b/benchmarks/chat/k6.js
@@ -4,40 +4,24 @@ import http from 'k6/http';
 import { Trend, Counter } from 'k6/metrics';
 
 const model_addr = __ENV.MODEL_ADDR;
-const model_id = __ENV.MODEL_ID;
+const config_dir = __ENV.CONFIG_DIR;
+const data_dir = __ENV.DATA_DIR;
+
 const timePerToken = new Trend('time_per_token', true);
 const tokens = new Counter('tokens');
 const new_tokens = new Counter('new_tokens');
 const input_tokens = new Counter('input_tokens');
-const max_new_tokens = 50;
 
-const messageThreads = JSON.parse(open("message-threads.json"))
+const k6Options = JSON.parse(open(`${config_dir}/k6.json`));
+const baseRequest = JSON.parse(open(`${config_dir}/base-request.json`));
+const messageThreads = JSON.parse(open(`${data_dir}/message-threads.json`))
 
-export const options = {
-    thresholds: {
-        http_req_failed: ['rate==0'],
-    },
-    scenarios: {
-        chat: {
-            executor: 'shared-iterations',
-            // Number of VUs to run concurrently.
-            vus: 20,
-            // Total number of script iterations to execute across all VUs (b/c using 'shared-iterations' executor).
-            iterations: 200,
-            maxDuration: '120s',
-        },
-    },
-};
+export const options = k6Options;
 
 export default function run() {
     const headers = { 'Content-Type': 'application/json' };
     const msgThread = messageThreads[scenario.iterationInTest % messageThreads.length];
-    var payload = {
-        "messages": [],
-        "temperature": 0,
-        "model": `${model_id}`,
-        "max_tokens": max_new_tokens
-    };
+    var payload = JSON.parse(JSON.stringify(baseRequest));
 
     // console.log(`Message thread: ${JSON.stringify(msgThread)}`);
 

diff --git a/benchmarks/chat/k8s/pod.yaml b/benchmarks/chat/k8s/pod.yaml
@@ -5,14 +5,15 @@ metadata:
 spec:
   restartPolicy: Never
   containers:
-    - name: k6
-      image: grafana/k6
+    - name: bench
+      image: us-central1-docker.pkg.dev/substratus-dev/default/kubeai-benchmark-chat:v0.0.2
       command: ["sleep", "infinity"]
-      #args: ["run", "/config/k6.js"] #, "--http-debug"]
-      volumeMounts:
-        - name: work
-          mountPath: /work
-  volumes:
-    - name: work
-      configMap:
-        name: chat-benchmark
+      resources:
+        requests:
+          cpu: 6
+          ephemeral-storage: 10Gi
+          memory: 24Gi
+        limits:
+          cpu: 6
+          ephemeral-storage: 10Gi
+          memory: 24Gi
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		ShareGPT_V3_unfiltered_cleaned_split.json
		data/ShareGPT_V3_unfiltered_cleaned_split.json