add python openai multithread system test

substratusai · Nov 11, 2023 · 7f341a8 · 7f341a8
1 parent b336066
commit 7f341a8
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 bin
+.venv
diff --git a/tests/system-test-kind.sh b/tests/system-test-kind.sh
@@ -3,38 +3,49 @@
 set -e
 
 kind create cluster --name=substratus-test
-trap "kind delete cluster --name=substratus-test" EXIT
+# trap "kind delete cluster --name=substratus-test" EXIT
 
 skaffold run
 
 kubectl wait --for=condition=available --timeout=30s deployment/proxy-controller
 
 kubectl port-forward svc/proxy-controller 8080:80 &
 
+helm upgrade --install stapi-minilm-l6-v2 substratusai/stapi -f - << EOF
+model: all-MiniLM-L6-v2
+replicaCount: 0
+deploymentAnnotations:
+  lingo.substratus.ai/models: text-embedding-ada-002
+EOF
+
 # need to wait for a bit for the port-forward to be ready
 sleep 5
 
-replicas=$(kubectl get deployment backend -o jsonpath='{.spec.replicas}')
+replicas=$(kubectl get deployment stapi-minilm-l6-v2 -o jsonpath='{.spec.replicas}')
 if [ "$replicas" -ne 0 ]; then
   echo "Expected 0 replica before sending requests, got $replicas"
   exit 1
 fi
 
-echo "Sending 60 requests to model named backend"
-for i in {1..60}; do
-curl -s -o /dev/null http://localhost:8080/delay/10 \
-  -H "Content-Type: application/json" \
-  -d '{
-    "text": "Your text string goes here",
-    "model": "backend"
-  }' &
-done
+SCRIPT_DIR=$(dirname "$0")
+VENV_DIR=$SCRIPT_DIR/.venv
+
+python3 -m venv "$VENV_DIR"
+source "$VENV_DIR/bin/activate"
+pip3 install openai==1.2.3
 
-sleep 10
+# Send 60 requests in parallel to stapi backend using openai python client and threading
+python3 $SCRIPT_DIR/test_openai_embedding.py --requests 60 --model text-embedding-ada-002
 
+# Ensure replicas has been scaled up to 1 after sending 60 requests
 replicas=$(kubectl get deployment backend -o jsonpath='{.spec.replicas}')
 
-if [ "$replicas" -ne 1 ]; then
-  echo "Expected 1 replica after sending less than 100 requests, got $replicas"
+# Send 500 requests in parallel to stapi backend using openai python client and threading
+SCRIPT_DIR=$(dirname "$0")
+python3 $SCRIPT_DIR/test_openai_embedding.py --requests 500 --model text-embedding-ada-002
+
+
+if [ "$replicas" -ge 2 ]; then
+  echo "Expected 2 or more replicas after sending more than 500 requests, got $replicas"
   exit 1
 fi
diff --git a/tests/test_openai_embedding.py b/tests/test_openai_embedding.py
@@ -0,0 +1,26 @@
+import argparse
+import concurrent.futures
+from openai import OpenAI
+
+parser = argparse.ArgumentParser(description="Test Lingo using Python OpenAI API")
+parser.add_argument("--base-url", type=str, default="http://localhost:8080/v1")
+parser.add_argument("--requests", type=int, default=60)
+parser.add_argument("--model", type=str, default="text-embedding-ada-002")
+parser.add_argument("--text", type=str, default="Generate an embedding for me")
+args = parser.parse_args()
+
+client = OpenAI(
+    api_key="this won't be used",
+    base_url=args.base_url,
+)
+
+def embedding_request(index: int):
+    print (f"Request {index} of {args.requests}")
+    embedding = client.embeddings.create(model=args.model, input=args.text)
+    print (f"Finished {index} of {args.requests}")
+    return embedding
+
+with concurrent.futures.ThreadPoolExecutor(max_workers=args.requests) as executor:
+    futures = [executor.submit(embedding_request, i+1) for i in range(args.requests)]
+    results = [future.result() for future in concurrent.futures.as_completed(futures, timeout=600)]
+    assert len(results) == args.requests