Upload results json output to GCS bucket (#871)

* first commit * nits * nit
GoogleCloudPlatform · Nov 4, 2024 · 6d66bdd · 6d66bdd
1 parent b2889c6
commit 6d66bdd
Show file tree

Hide file tree

Showing 8 changed files with 71 additions and 13 deletions.
diff --git a/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py b/benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py
@@ -17,6 +17,7 @@
 
 import google.auth
 import google.auth.transport.requests
+from google.cloud import storage
 
 import aiohttp
 import numpy as np
@@ -47,6 +48,10 @@ async def on_request_end(session, trace_config_ctx, params):
 trace_config.on_request_start.append(on_request_start)
 trace_config.on_request_end.append(on_request_end)
 
+# Google Cloud Storage Client
+gcs_client = None
+gcs_bucket = None
+
 def sample_requests(
     dataset_path: str,
     num_requests: int,
@@ -337,7 +342,6 @@ async def benchmark(
   print_and_save_result(args, benchmark_duration, len(input_requests), model, combined_latencies, combined_errors)
   return combined_latencies, combined_errors
 
-
 def save_json_results(args: argparse.Namespace, benchmark_result, server_metrics, model, errors):
   # Setup
   start_dt_proto = Timestamp()
@@ -427,6 +431,9 @@ def save_json_results(args: argparse.Namespace, benchmark_result, server_metrics
   )
   with open(file_name, "w", encoding="utf-8") as outfile:
     json.dump(final_json, outfile)
+  if gcs_bucket is not None:
+    gcs_bucket.blob(f"{args.output_bucket_filepath}/{file_name}").upload_from_filename(file_name)
+    print(f"File {file_name} uploaded to gs://{args.output_bucket}/{args.output_bucket_filepath}")
 
 def metrics_to_scrape(backend: str) -> List[str]:
   # Each key in the map is a metric, it has a corresponding 'stats' object
@@ -610,6 +617,19 @@ async def main(args: argparse.Namespace):
     if args.backend == "vllm"
     else args.endpoint
 )
+
+  # Create GCS client before benchmarking
+  # Should fail fast if client is misconfigured or missing permissions
+  if args.output_bucket is not None:
+    global gcs_client
+    gcs_client = storage.Client()
+    global gcs_bucket
+    gcs_bucket = gcs_client.bucket(args.output_bucket)
+
+    if args.output_bucket_filepath:
+      blob = gcs_bucket.blob(args.output_bucket_filepath)
+      if not blob.exists():
+        blob.upload_from_string('')
 
   print(f"Starting Prometheus Server on port {PROMETHEUS_PORT}")
   start_http_server(PROMETHEUS_PORT)
@@ -759,6 +779,27 @@ async def main(args: argparse.Namespace):
       action="store_true",
       help="Whether to save benchmark results to a json file.",
   )
+  parser.add_argument(
+    "--output-bucket",
+    type=str,
+    default=None,
+    help=(
+      "Specifies the Google Cloud Storage bucket to which JSON-format results"
+      " will be uploaded. If not provided, no upload will occur."
+    )
+  )
+  parser.add_argument(
+    "--output-bucket-filepath",
+    type=str,
+    default=None,
+    help=(
+      "Specifies the destination path within the bucket provided by"
+      " --output-bucket for uploading the JSON results. This argument requires"
+      " --output-bucket to be set. If not specified, results will be uploaded "
+      " to the root of the bucket. If the filepath doesnt exist, it will be"
+      " created for you."
+    )
+  )
   parser.add_argument(
     "--save-aggregated-result",
     action="store_true",

diff --git a/benchmarks/benchmark/tools/profile-generator/container/latency_throughput_curve.sh b/benchmarks/benchmark/tools/profile-generator/container/latency_throughput_curve.sh
@@ -37,7 +37,7 @@ for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do
     num_prompts=$(awk "BEGIN {print int($request_rate * $BENCHMARK_TIME_SECONDS)}")
   fi
   echo "TOTAL prompts: $num_prompts"  # Output: 8
-  PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP   --port=$PORT --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$num_prompts --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH --file-prefix=$FILE_PREFIX --models=$MODELS"
+  PYTHON_OPTS="$PYTHON_OPTS --save-json-results --output-bucket=$OUTPUT_BUCKET --output-bucket-filepath $OUTPUT_BUCKET_FILEPATH --host=$IP  --port=$PORT --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$num_prompts --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH --file-prefix=$FILE_PREFIX --models=$MODELS"
   if [[ "$SCRAPE_SERVER_METRICS" = "true" ]]; then
     PYTHON_OPTS="$PYTHON_OPTS --scrape-server-metrics"
   fi

diff --git a/benchmarks/benchmark/tools/profile-generator/container/requirements.txt b/benchmarks/benchmark/tools/profile-generator/container/requirements.txt
@@ -35,4 +35,5 @@ pynvml == 11.5.0
 accelerate
 aiohttp
 google-auth
+google-cloud-storage >= 2.18.2
 prometheus_client >= 0.21.0
diff --git a/benchmarks/benchmark/tools/profile-generator/main.tf b/benchmarks/benchmark/tools/profile-generator/main.tf
@@ -68,13 +68,16 @@ module "latency-profile" {
       port = var.targets.manual.service_port
     }
   }
-  prompt_dataset                             = var.prompt_dataset
-  max_num_prompts                            = var.max_num_prompts
-  max_output_len                             = var.max_output_len
-  max_prompt_len                             = var.max_prompt_len
-  request_rates                              = var.request_rates
-  benchmark_time_seconds                     = var.benchmark_time_seconds
-  output_bucket                              = var.output_bucket
+  prompt_dataset         = var.prompt_dataset
+  max_num_prompts        = var.max_num_prompts
+  max_output_len         = var.max_output_len
+  max_prompt_len         = var.max_prompt_len
+  request_rates          = var.request_rates
+  benchmark_time_seconds = var.benchmark_time_seconds
+  gcs_output = {
+    bucket   = var.output_bucket
+    filepath = var.output_bucket_filepath
+  }
   latency_profile_kubernetes_service_account = var.latency_profile_kubernetes_service_account
   k8s_hf_secret                              = var.k8s_hf_secret
   hugging_face_secret                        = var.hugging_face_secret

diff --git a/benchmarks/benchmark/tools/profile-generator/modules/latency-profile/main.tf b/benchmarks/benchmark/tools/profile-generator/modules/latency-profile/main.tf
@@ -63,7 +63,8 @@ resource "kubernetes_manifest" "latency-profile-generator" {
     request_rates                              = join(",", [for number in var.request_rates : tostring(number)])
     hugging_face_token_secret_list             = local.hugging_face_token_secret == null ? [] : [local.hugging_face_token_secret]
     k8s_hf_secret_list                         = var.k8s_hf_secret == null ? [] : [var.k8s_hf_secret]
-    output_bucket                              = var.output_bucket
+    output_bucket                              = var.gcs_output.bucket
+    output_bucket_filepath                     = var.gcs_output.filepath
     scrape_server_metrics                      = var.scrape_server_metrics
     file_prefix                                = var.file_prefix
     save_aggregated_result                     = var.save_aggregated_result

diff --git a/...e-generator/modules/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl b/...e-generator/modules/latency-profile/manifest-templates/latency-profile-generator.yaml.tpl
@@ -42,6 +42,8 @@ spec:
               value: ${benchmark_time_seconds}
             - name: OUTPUT_BUCKET
               value: ${output_bucket}
+            - name: OUTPUT_BUCKET_FILEPATH
+              value: ${output_bucket_filepath}
             - name: SCRAPE_SERVER_METRICS
               value: ${scrape_server_metrics}
             - name: MAX_NUM_PROMPTS

diff --git a/benchmarks/benchmark/tools/profile-generator/modules/latency-profile/variables.tf b/benchmarks/benchmark/tools/profile-generator/modules/latency-profile/variables.tf
@@ -139,9 +139,13 @@ variable "models" {
   default     = "tiiuae/falcon-7b"
 }
 
-variable "output_bucket" {
-  description = "Bucket name for storing results"
-  type        = string
+variable "gcs_output" {
+  description = "Bucket name and filepath for storing json results, if filepath not specified, results uploaded to root of bucket"
+  type = object({
+    bucket   = string
+    filepath = optional(string)
+  })
+  nullable = true
 }
 
 variable "latency_profile_kubernetes_service_account" {

diff --git a/benchmarks/benchmark/tools/profile-generator/variables.tf b/benchmarks/benchmark/tools/profile-generator/variables.tf
@@ -117,6 +117,12 @@ variable "output_bucket" {
   type        = string
 }
 
+variable "output_bucket_filepath" {
+  description = "Where in bucket to store json results, will upload to root of bucket if not specified"
+  type        = string
+  nullable    = true
+}
+
 variable "latency_profile_kubernetes_service_account" {
   description = "Kubernetes Service Account to be used for the latency profile generator tool"
   type        = string