Skip to content

Commit

Permalink
Upload results json output to GCS bucket (#871)
Browse files Browse the repository at this point in the history
* first commit

* nits

* nit
  • Loading branch information
Bslabe123 authored Nov 4, 2024
1 parent b2889c6 commit 6d66bdd
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import google.auth
import google.auth.transport.requests
from google.cloud import storage

import aiohttp
import numpy as np
Expand Down Expand Up @@ -47,6 +48,10 @@ async def on_request_end(session, trace_config_ctx, params):
trace_config.on_request_start.append(on_request_start)
trace_config.on_request_end.append(on_request_end)

# Google Cloud Storage Client
gcs_client = None
gcs_bucket = None

def sample_requests(
dataset_path: str,
num_requests: int,
Expand Down Expand Up @@ -337,7 +342,6 @@ async def benchmark(
print_and_save_result(args, benchmark_duration, len(input_requests), model, combined_latencies, combined_errors)
return combined_latencies, combined_errors


def save_json_results(args: argparse.Namespace, benchmark_result, server_metrics, model, errors):
# Setup
start_dt_proto = Timestamp()
Expand Down Expand Up @@ -427,6 +431,9 @@ def save_json_results(args: argparse.Namespace, benchmark_result, server_metrics
)
with open(file_name, "w", encoding="utf-8") as outfile:
json.dump(final_json, outfile)
if gcs_bucket is not None:
gcs_bucket.blob(f"{args.output_bucket_filepath}/{file_name}").upload_from_filename(file_name)
print(f"File {file_name} uploaded to gs://{args.output_bucket}/{args.output_bucket_filepath}")

def metrics_to_scrape(backend: str) -> List[str]:
# Each key in the map is a metric, it has a corresponding 'stats' object
Expand Down Expand Up @@ -610,6 +617,19 @@ async def main(args: argparse.Namespace):
if args.backend == "vllm"
else args.endpoint
)

# Create GCS client before benchmarking
# Should fail fast if client is misconfigured or missing permissions
if args.output_bucket is not None:
global gcs_client
gcs_client = storage.Client()
global gcs_bucket
gcs_bucket = gcs_client.bucket(args.output_bucket)

if args.output_bucket_filepath:
blob = gcs_bucket.blob(args.output_bucket_filepath)
if not blob.exists():
blob.upload_from_string('')

print(f"Starting Prometheus Server on port {PROMETHEUS_PORT}")
start_http_server(PROMETHEUS_PORT)
Expand Down Expand Up @@ -759,6 +779,27 @@ async def main(args: argparse.Namespace):
action="store_true",
help="Whether to save benchmark results to a json file.",
)
parser.add_argument(
"--output-bucket",
type=str,
default=None,
help=(
"Specifies the Google Cloud Storage bucket to which JSON-format results"
" will be uploaded. If not provided, no upload will occur."
)
)
parser.add_argument(
"--output-bucket-filepath",
type=str,
default=None,
help=(
"Specifies the destination path within the bucket provided by"
" --output-bucket for uploading the JSON results. This argument requires"
" --output-bucket to be set. If not specified, results will be uploaded "
" to the root of the bucket. If the filepath doesnt exist, it will be"
" created for you."
)
)
parser.add_argument(
"--save-aggregated-result",
action="store_true",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do
num_prompts=$(awk "BEGIN {print int($request_rate * $BENCHMARK_TIME_SECONDS)}")
fi
echo "TOTAL prompts: $num_prompts" # Output: 8
PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP --port=$PORT --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$num_prompts --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH --file-prefix=$FILE_PREFIX --models=$MODELS"
PYTHON_OPTS="$PYTHON_OPTS --save-json-results --output-bucket=$OUTPUT_BUCKET --output-bucket-filepath $OUTPUT_BUCKET_FILEPATH --host=$IP --port=$PORT --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$num_prompts --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH --file-prefix=$FILE_PREFIX --models=$MODELS"
if [[ "$SCRAPE_SERVER_METRICS" = "true" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --scrape-server-metrics"
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ pynvml == 11.5.0
accelerate
aiohttp
google-auth
google-cloud-storage >= 2.18.2
prometheus_client >= 0.21.0
17 changes: 10 additions & 7 deletions benchmarks/benchmark/tools/profile-generator/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,16 @@ module "latency-profile" {
port = var.targets.manual.service_port
}
}
prompt_dataset = var.prompt_dataset
max_num_prompts = var.max_num_prompts
max_output_len = var.max_output_len
max_prompt_len = var.max_prompt_len
request_rates = var.request_rates
benchmark_time_seconds = var.benchmark_time_seconds
output_bucket = var.output_bucket
prompt_dataset = var.prompt_dataset
max_num_prompts = var.max_num_prompts
max_output_len = var.max_output_len
max_prompt_len = var.max_prompt_len
request_rates = var.request_rates
benchmark_time_seconds = var.benchmark_time_seconds
gcs_output = {
bucket = var.output_bucket
filepath = var.output_bucket_filepath
}
latency_profile_kubernetes_service_account = var.latency_profile_kubernetes_service_account
k8s_hf_secret = var.k8s_hf_secret
hugging_face_secret = var.hugging_face_secret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ resource "kubernetes_manifest" "latency-profile-generator" {
request_rates = join(",", [for number in var.request_rates : tostring(number)])
hugging_face_token_secret_list = local.hugging_face_token_secret == null ? [] : [local.hugging_face_token_secret]
k8s_hf_secret_list = var.k8s_hf_secret == null ? [] : [var.k8s_hf_secret]
output_bucket = var.output_bucket
output_bucket = var.gcs_output.bucket
output_bucket_filepath = var.gcs_output.filepath
scrape_server_metrics = var.scrape_server_metrics
file_prefix = var.file_prefix
save_aggregated_result = var.save_aggregated_result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ spec:
value: ${benchmark_time_seconds}
- name: OUTPUT_BUCKET
value: ${output_bucket}
- name: OUTPUT_BUCKET_FILEPATH
value: ${output_bucket_filepath}
- name: SCRAPE_SERVER_METRICS
value: ${scrape_server_metrics}
- name: MAX_NUM_PROMPTS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,13 @@ variable "models" {
default = "tiiuae/falcon-7b"
}

variable "output_bucket" {
description = "Bucket name for storing results"
type = string
variable "gcs_output" {
description = "Bucket name and filepath for storing json results, if filepath not specified, results uploaded to root of bucket"
type = object({
bucket = string
filepath = optional(string)
})
nullable = true
}

variable "latency_profile_kubernetes_service_account" {
Expand Down
6 changes: 6 additions & 0 deletions benchmarks/benchmark/tools/profile-generator/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ variable "output_bucket" {
type = string
}

variable "output_bucket_filepath" {
description = "Where in bucket to store json results, will upload to root of bucket if not specified"
type = string
nullable = true
}

variable "latency_profile_kubernetes_service_account" {
description = "Kubernetes Service Account to be used for the latency profile generator tool"
type = string
Expand Down

0 comments on commit 6d66bdd

Please sign in to comment.