Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GCS Bucket upload support #871

Merged
merged 9 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import google.auth
import google.auth.transport.requests
from google.cloud import storage

import aiohttp
import numpy as np
Expand Down Expand Up @@ -47,6 +48,10 @@ async def on_request_end(session, trace_config_ctx, params):
trace_config.on_request_start.append(on_request_start)
trace_config.on_request_end.append(on_request_end)

# Google Cloud Storage Client
gcs_client = None
gcs_bucket = None

def sample_requests(
dataset_path: str,
num_requests: int,
Expand Down Expand Up @@ -337,7 +342,6 @@ async def benchmark(
print_and_save_result(args, benchmark_duration, len(input_requests), model, combined_latencies, combined_errors)
return combined_latencies, combined_errors


def save_json_results(args: argparse.Namespace, benchmark_result, server_metrics, model, errors):
# Setup
start_dt_proto = Timestamp()
Expand Down Expand Up @@ -427,6 +431,9 @@ def save_json_results(args: argparse.Namespace, benchmark_result, server_metrics
)
with open(file_name, "w", encoding="utf-8") as outfile:
json.dump(final_json, outfile)
if gcs_bucket is not None:
gcs_bucket.blob(f"{args.output_bucket_filepath}/{file_name}").upload_from_filename(file_name)
print(f"File {file_name} uploaded to gs://{args.output_bucket}/{args.output_bucket_filepath}")

def metrics_to_scrape(backend: str) -> List[str]:
# Each key in the map is a metric, it has a corresponding 'stats' object
Expand Down Expand Up @@ -610,6 +617,19 @@ async def main(args: argparse.Namespace):
if args.backend == "vllm"
else args.endpoint
)

# Create GCS client before benchmarking
# Should fail fast if client is misconfigured or missing permissions
if args.output_bucket is not None:
global gcs_client
gcs_client = storage.Client()
global gcs_bucket
gcs_bucket = gcs_client.bucket(args.output_bucket)

if args.output_bucket_filepath:
blob = gcs_bucket.blob(args.output_bucket_filepath)
if not blob.exists():
annapendleton marked this conversation as resolved.
Show resolved Hide resolved
blob.upload_from_string('')

print(f"Starting Prometheus Server on port {PROMETHEUS_PORT}")
start_http_server(PROMETHEUS_PORT)
Expand Down Expand Up @@ -759,6 +779,26 @@ async def main(args: argparse.Namespace):
action="store_true",
help="Whether to save benchmark results to a json file.",
)
parser.add_argument(
"--output-bucket",
type=str,
default=None,
help=(
"Specifies the Google Cloud Storage bucket to which JSON-format results"
" will be uploaded. If not provided, no upload will occur."
)
)
parser.add_argument(
"--output-bucket-filepath",
type=str,
default=None,
help=(
"Specifies the destination path within the bucket provided by"
" --output-bucket for uploading the JSON results. This argument requires"
" --output-bucket to be set. If not specified, results will be uploaded "
" to the root of the bucket."
)
)
parser.add_argument(
"--save-aggregated-result",
action="store_true",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do
num_prompts=$(awk "BEGIN {print int($request_rate * $BENCHMARK_TIME_SECONDS)}")
fi
echo "TOTAL prompts: $num_prompts" # Output: 8
PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP --port=$PORT --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$num_prompts --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH --file-prefix=$FILE_PREFIX --models=$MODELS"
PYTHON_OPTS="$PYTHON_OPTS --save-json-results --output-bucket=$OUTPUT_BUCKET --output-bucket-filepath $OUTPUT_BUCKET_FILEPATH --host=$IP --port=$PORT --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$num_prompts --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH --file-prefix=$FILE_PREFIX --models=$MODELS"
if [[ "$SCRAPE_SERVER_METRICS" = "true" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --scrape-server-metrics"
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ pynvml == 11.5.0
accelerate
aiohttp
google-auth
google-cloud-storage >= 2.18.2
prometheus_client >= 0.21.0
17 changes: 10 additions & 7 deletions benchmarks/benchmark/tools/profile-generator/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,16 @@ module "latency-profile" {
port = var.targets.manual.service_port
}
}
prompt_dataset = var.prompt_dataset
max_num_prompts = var.max_num_prompts
max_output_len = var.max_output_len
max_prompt_len = var.max_prompt_len
request_rates = var.request_rates
benchmark_time_seconds = var.benchmark_time_seconds
output_bucket = var.output_bucket
prompt_dataset = var.prompt_dataset
max_num_prompts = var.max_num_prompts
max_output_len = var.max_output_len
max_prompt_len = var.max_prompt_len
request_rates = var.request_rates
benchmark_time_seconds = var.benchmark_time_seconds
gcs_output = {
bucket = var.output_bucket
filepath = var.output_bucket_filepath
}
latency_profile_kubernetes_service_account = var.latency_profile_kubernetes_service_account
k8s_hf_secret = var.k8s_hf_secret
hugging_face_secret = var.hugging_face_secret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ resource "kubernetes_manifest" "latency-profile-generator" {
request_rates = join(",", [for number in var.request_rates : tostring(number)])
hugging_face_token_secret_list = local.hugging_face_token_secret == null ? [] : [local.hugging_face_token_secret]
k8s_hf_secret_list = var.k8s_hf_secret == null ? [] : [var.k8s_hf_secret]
output_bucket = var.output_bucket
output_bucket = var.gcs_output.bucket
output_bucket_filepath = var.gcs_output.filepath
scrape_server_metrics = var.scrape_server_metrics
file_prefix = var.file_prefix
save_aggregated_result = var.save_aggregated_result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ spec:
value: ${benchmark_time_seconds}
- name: OUTPUT_BUCKET
value: ${output_bucket}
- name: OUTPUT_BUCKET_FILEPATH
value: ${output_bucket_filepath}
- name: SCRAPE_SERVER_METRICS
value: ${scrape_server_metrics}
- name: MAX_NUM_PROMPTS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,13 @@ variable "models" {
default = "tiiuae/falcon-7b"
}

variable "output_bucket" {
description = "Bucket name for storing results"
type = string
variable "gcs_output" {
description = "Bucket name and filepath for storing json results, if filepath not specified, results uploaded to root of bucket"
type = object({
bucket = string
filepath = optional(string)
})
nullable = true
}

variable "latency_profile_kubernetes_service_account" {
Expand Down
6 changes: 6 additions & 0 deletions benchmarks/benchmark/tools/profile-generator/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ variable "output_bucket" {
type = string
}

variable "output_bucket_filepath" {
description = "Where in bucket to store json results, will upload to root of bucket if not specified"
type = string
nullable = true
}

variable "latency_profile_kubernetes_service_account" {
description = "Kubernetes Service Account to be used for the latency profile generator tool"
type = string
Expand Down