From c5af96e7f02121d063baa4acd94da1a4c27f40c7 Mon Sep 17 00:00:00 2001 From: Anna Pendleton Date: Tue, 14 May 2024 17:06:50 +0000 Subject: [PATCH] fix hardcoded request type --- .../locust-load-inference/locust-docker/locust-tasks/run.sh | 5 +++++ benchmarks/benchmark/tools/locust-load-inference/main.tf | 1 + .../manifest-templates/locust-master-controller.yaml.tpl | 2 +- .../manifest-templates/locust-worker-controller.yaml.tpl | 2 +- .../tools/locust-load-inference/sample-terraform.tfvars | 2 +- 5 files changed, 9 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark/tools/locust-load-inference/locust-docker/locust-tasks/run.sh b/benchmarks/benchmark/tools/locust-load-inference/locust-docker/locust-tasks/run.sh index 3e1084d0b..f032303df 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/locust-docker/locust-tasks/run.sh +++ b/benchmarks/benchmark/tools/locust-load-inference/locust-docker/locust-tasks/run.sh @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +echo "starting locust for $LOCUST_MODE" + LOCUST="/usr/local/bin/locust" LOCUST_OPTS="-f /locust-tasks/tasks.py " LOCUST_MODE=${LOCUST_MODE:-standalone} @@ -35,7 +37,10 @@ if [[ "$LOCUST_MODE" = "master" ]]; then LOCUST_OPTS="$LOCUST_OPTS --stop-timeout $STOP_TIMEOUT" fi elif [[ "$LOCUST_MODE" = "worker" ]]; then + + echo "login to huggingface" huggingface-cli login --token $HUGGINGFACE_TOKEN + FILTER_PROMPTS="python /locust-tasks/load_data.py" FILTER_PROMPTS_OPTS="--gcs_path=$GCS_PATH --tokenizer=$TOKENIZER --max_prompt_len=$MAX_PROMPT_LEN --max_num_prompts=$MAX_NUM_PROMPTS" echo "$FILTER_PROMPTS $FILTER_PROMPTS_OPTS" diff --git a/benchmarks/benchmark/tools/locust-load-inference/main.tf b/benchmarks/benchmark/tools/locust-load-inference/main.tf index 12690d02e..5a8a3b03b 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/main.tf +++ b/benchmarks/benchmark/tools/locust-load-inference/main.tf @@ -48,6 +48,7 @@ locals { use_beam_search = var.use_beam_search hugging_face_token_secret_list = local.hugging_face_token_secret == null ? [] : [local.hugging_face_token_secret] stop_timeout = var.stop_timeout + request_type = var.request_type })) : data] ]) } diff --git a/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-master-controller.yaml.tpl b/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-master-controller.yaml.tpl index ff212130e..0b443879a 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-master-controller.yaml.tpl +++ b/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-master-controller.yaml.tpl @@ -30,7 +30,7 @@ spec: - name: STOP_TIMEOUT value: ${stop_timeout} - name: REQUEST_TYPE - value: grpc + value: ${request_type} ports: - name: loc-master-web containerPort: 8089 diff --git a/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl b/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl index 047b6c2ed..b952e4f36 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl +++ b/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl @@ -24,7 +24,7 @@ spec: - name: LOCUST_MODE value: worker - name: REQUEST_TYPE - value: grpc + value: ${request_type} - name: LOCUST_MASTER value: locust-master - name: TARGET_HOST diff --git a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars b/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars index dcd6739b4..1db298f30 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars +++ b/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars @@ -9,7 +9,7 @@ ksa = "benchmark-ksa" # Locust service configuration artifact_registry = "us-central1-docker.pkg.dev/$PROJECT_ID/ai-benchmark" -inference_server_service = "tgi" # inference server service name +inference_server_service = "http://tgi" # inference server service name locust_runner_kubernetes_service_account = "sample-runner-ksa" output_bucket = "benchmark-output" gcs_path = "gs://${PROJECT_ID}-ai-gke-benchmark-fuse/ShareGPT_V3_unfiltered_cleaned_split_filtered_prompts.txt"