From 7f341a8b59ddfc0d8e77ad7f9f9a4d5e1ab82565 Mon Sep 17 00:00:00 2001 From: Sam Stoelinga Date: Sat, 11 Nov 2023 10:10:42 -0800 Subject: [PATCH] add python openai multithread system test --- .gitignore | 1 + tests/system-test-kind.sh | 39 ++++++++++++++++++++++------------ tests/test_openai_embedding.py | 26 +++++++++++++++++++++++ 3 files changed, 52 insertions(+), 14 deletions(-) create mode 100755 tests/test_openai_embedding.py diff --git a/.gitignore b/.gitignore index ba077a40..6cb0c8a4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ bin +.venv diff --git a/tests/system-test-kind.sh b/tests/system-test-kind.sh index aa00b33b..a465c951 100755 --- a/tests/system-test-kind.sh +++ b/tests/system-test-kind.sh @@ -3,7 +3,7 @@ set -e kind create cluster --name=substratus-test -trap "kind delete cluster --name=substratus-test" EXIT +# trap "kind delete cluster --name=substratus-test" EXIT skaffold run @@ -11,30 +11,41 @@ kubectl wait --for=condition=available --timeout=30s deployment/proxy-controller kubectl port-forward svc/proxy-controller 8080:80 & +helm upgrade --install stapi-minilm-l6-v2 substratusai/stapi -f - << EOF +model: all-MiniLM-L6-v2 +replicaCount: 0 +deploymentAnnotations: + lingo.substratus.ai/models: text-embedding-ada-002 +EOF + # need to wait for a bit for the port-forward to be ready sleep 5 -replicas=$(kubectl get deployment backend -o jsonpath='{.spec.replicas}') +replicas=$(kubectl get deployment stapi-minilm-l6-v2 -o jsonpath='{.spec.replicas}') if [ "$replicas" -ne 0 ]; then echo "Expected 0 replica before sending requests, got $replicas" exit 1 fi -echo "Sending 60 requests to model named backend" -for i in {1..60}; do -curl -s -o /dev/null http://localhost:8080/delay/10 \ - -H "Content-Type: application/json" \ - -d '{ - "text": "Your text string goes here", - "model": "backend" - }' & -done +SCRIPT_DIR=$(dirname "$0") +VENV_DIR=$SCRIPT_DIR/.venv + +python3 -m venv "$VENV_DIR" +source "$VENV_DIR/bin/activate" +pip3 install openai==1.2.3 -sleep 10 +# Send 60 requests in parallel to stapi backend using openai python client and threading +python3 $SCRIPT_DIR/test_openai_embedding.py --requests 60 --model text-embedding-ada-002 +# Ensure replicas has been scaled up to 1 after sending 60 requests replicas=$(kubectl get deployment backend -o jsonpath='{.spec.replicas}') -if [ "$replicas" -ne 1 ]; then - echo "Expected 1 replica after sending less than 100 requests, got $replicas" +# Send 500 requests in parallel to stapi backend using openai python client and threading +SCRIPT_DIR=$(dirname "$0") +python3 $SCRIPT_DIR/test_openai_embedding.py --requests 500 --model text-embedding-ada-002 + + +if [ "$replicas" -ge 2 ]; then + echo "Expected 2 or more replicas after sending more than 500 requests, got $replicas" exit 1 fi diff --git a/tests/test_openai_embedding.py b/tests/test_openai_embedding.py new file mode 100755 index 00000000..10efa2cb --- /dev/null +++ b/tests/test_openai_embedding.py @@ -0,0 +1,26 @@ +import argparse +import concurrent.futures +from openai import OpenAI + +parser = argparse.ArgumentParser(description="Test Lingo using Python OpenAI API") +parser.add_argument("--base-url", type=str, default="http://localhost:8080/v1") +parser.add_argument("--requests", type=int, default=60) +parser.add_argument("--model", type=str, default="text-embedding-ada-002") +parser.add_argument("--text", type=str, default="Generate an embedding for me") +args = parser.parse_args() + +client = OpenAI( + api_key="this won't be used", + base_url=args.base_url, +) + +def embedding_request(index: int): + print (f"Request {index} of {args.requests}") + embedding = client.embeddings.create(model=args.model, input=args.text) + print (f"Finished {index} of {args.requests}") + return embedding + +with concurrent.futures.ThreadPoolExecutor(max_workers=args.requests) as executor: + futures = [executor.submit(embedding_request, i+1) for i in range(args.requests)] + results = [future.result() for future in concurrent.futures.as_completed(futures, timeout=600)] + assert len(results) == args.requests