From 7f341a8b59ddfc0d8e77ad7f9f9a4d5e1ab82565 Mon Sep 17 00:00:00 2001
From: Sam Stoelinga <sammiestoel@gmail.com>
Date: Sat, 11 Nov 2023 10:10:42 -0800
Subject: [PATCH] add python openai multithread system test

---
 .gitignore                     |  1 +
 tests/system-test-kind.sh      | 39 ++++++++++++++++++++++------------
 tests/test_openai_embedding.py | 26 +++++++++++++++++++++++
 3 files changed, 52 insertions(+), 14 deletions(-)
 create mode 100755 tests/test_openai_embedding.py

diff --git a/.gitignore b/.gitignore
index ba077a40..6cb0c8a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 bin
+.venv
diff --git a/tests/system-test-kind.sh b/tests/system-test-kind.sh
index aa00b33b..a465c951 100755
--- a/tests/system-test-kind.sh
+++ b/tests/system-test-kind.sh
@@ -3,7 +3,7 @@
 set -e
 
 kind create cluster --name=substratus-test
-trap "kind delete cluster --name=substratus-test" EXIT
+# trap "kind delete cluster --name=substratus-test" EXIT
 
 skaffold run
 
@@ -11,30 +11,41 @@ kubectl wait --for=condition=available --timeout=30s deployment/proxy-controller
 
 kubectl port-forward svc/proxy-controller 8080:80 &
 
+helm upgrade --install stapi-minilm-l6-v2 substratusai/stapi -f - << EOF
+model: all-MiniLM-L6-v2
+replicaCount: 0
+deploymentAnnotations:
+  lingo.substratus.ai/models: text-embedding-ada-002
+EOF
+
 # need to wait for a bit for the port-forward to be ready
 sleep 5
 
-replicas=$(kubectl get deployment backend -o jsonpath='{.spec.replicas}')
+replicas=$(kubectl get deployment stapi-minilm-l6-v2 -o jsonpath='{.spec.replicas}')
 if [ "$replicas" -ne 0 ]; then
   echo "Expected 0 replica before sending requests, got $replicas"
   exit 1
 fi
 
-echo "Sending 60 requests to model named backend"
-for i in {1..60}; do
-curl -s -o /dev/null http://localhost:8080/delay/10 \
-  -H "Content-Type: application/json" \
-  -d '{
-    "text": "Your text string goes here",
-    "model": "backend"
-  }' &
-done
+SCRIPT_DIR=$(dirname "$0")
+VENV_DIR=$SCRIPT_DIR/.venv
+
+python3 -m venv "$VENV_DIR"
+source "$VENV_DIR/bin/activate"
+pip3 install openai==1.2.3
 
-sleep 10
+# Send 60 requests in parallel to stapi backend using openai python client and threading
+python3 $SCRIPT_DIR/test_openai_embedding.py --requests 60 --model text-embedding-ada-002
 
+# Ensure replicas has been scaled up to 1 after sending 60 requests
 replicas=$(kubectl get deployment backend -o jsonpath='{.spec.replicas}')
 
-if [ "$replicas" -ne 1 ]; then
-  echo "Expected 1 replica after sending less than 100 requests, got $replicas"
+# Send 500 requests in parallel to stapi backend using openai python client and threading
+SCRIPT_DIR=$(dirname "$0")
+python3 $SCRIPT_DIR/test_openai_embedding.py --requests 500 --model text-embedding-ada-002
+
+
+if [ "$replicas" -ge 2 ]; then
+  echo "Expected 2 or more replicas after sending more than 500 requests, got $replicas"
   exit 1
 fi
diff --git a/tests/test_openai_embedding.py b/tests/test_openai_embedding.py
new file mode 100755
index 00000000..10efa2cb
--- /dev/null
+++ b/tests/test_openai_embedding.py
@@ -0,0 +1,26 @@
+import argparse
+import concurrent.futures
+from openai import OpenAI
+
+parser = argparse.ArgumentParser(description="Test Lingo using Python OpenAI API")
+parser.add_argument("--base-url", type=str, default="http://localhost:8080/v1")
+parser.add_argument("--requests", type=int, default=60)
+parser.add_argument("--model", type=str, default="text-embedding-ada-002")
+parser.add_argument("--text", type=str, default="Generate an embedding for me")
+args = parser.parse_args()
+
+client = OpenAI(
+    api_key="this won't be used",
+    base_url=args.base_url,
+)
+
+def embedding_request(index: int):
+    print (f"Request {index} of {args.requests}")
+    embedding = client.embeddings.create(model=args.model, input=args.text)
+    print (f"Finished {index} of {args.requests}")
+    return embedding
+
+with concurrent.futures.ThreadPoolExecutor(max_workers=args.requests) as executor:
+    futures = [executor.submit(embedding_request, i+1) for i in range(args.requests)]
+    results = [future.result() for future in concurrent.futures.as_completed(futures, timeout=600)]
+    assert len(results) == args.requests