Skip to content

Commit

Permalink
Aligning Benchmarking and PyBuda repositories.
Browse files Browse the repository at this point in the history
  • Loading branch information
vcanicTT committed Apr 26, 2024
1 parent 8251137 commit 1839b59
Show file tree
Hide file tree
Showing 23 changed files with 407 additions and 199 deletions.
36 changes: 35 additions & 1 deletion benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,32 @@
logger.setLevel(logging.INFO)


def print_benchmark_envs():

# If specified by env variable, print the environment variables
# It can be useful in CI jobs to get the state of the enviroment variables before test session starts
print_env_variables = bool(int(os.environ.get("PYTEST_PRINT_ENV_VARIABLES", "0")))
if print_env_variables:
pybuda_specific_vars = {}
tt_backend_specific_vars = {}
print(f"####### Environment variables - Count: {len(os.environ)} #######")
for key, value in os.environ.items():
print(f"{key}={value}")
if key.startswith("PYBUDA_") or key.startswith("GOLDEN_"):
pybuda_specific_vars[key] = value
elif key.startswith("TT_BACKEND_"):
tt_backend_specific_vars[key] = value

print(f"####### PYBUDA specific enviroment variables - Count: {len(pybuda_specific_vars)} #######")
for key, value in pybuda_specific_vars.items():
print(f"{key}={value}")

print(f"####### TT_BACKEND specific enviroment variables - Count: {len(tt_backend_specific_vars)} #######")
for key, value in tt_backend_specific_vars.items():
print(f"{key}={value}")



def run(
args,
model: Any,
Expand Down Expand Up @@ -169,14 +195,19 @@ def run(
print(f"Pybuda successfully compiled model to: {args.save_tti}")
exit(0)

if "verify_cfg" in model.keys():
verify_cfg = model["verify_cfg"]
else:
verify_cfg = pybuda.verify.VerifyConfig(verify_pybuda_codegen_vs_framework=True)

# Compilation run
monitor_thread = threading.Thread(target=benchmark_run.cpu_usage_monitor)
monitor_thread.start()
benchmark_run.start_compilation_timer()
output_q = pybuda.initialize_pipeline(
training=args.training,
sample_inputs=sample_inputs,
_verify_cfg=pybuda.verify.VerifyConfig(verify_pybuda_codegen_vs_framework=True),
_verify_cfg=verify_cfg,
sample_targets=targets,
)
benchmark_run.stop_monitoring = True
Expand Down Expand Up @@ -330,6 +361,9 @@ def pop_outputs_thread(output_q):


if __name__ == "__main__":

print_benchmark_envs()

# Arguments
parser = argparse.ArgumentParser(description="Benchmark a model on TT hardware")
parser.add_argument("-m", "--model", help="Model to benchmark (i.e. bert)")
Expand Down
13 changes: 6 additions & 7 deletions benchmark/models/bert/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,16 @@ def bert(training: bool, task: str, config: str, microbatch: int, device: str, d
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_EXP_APPROX"] = "1"
if data_type == "Bfp8_b":
if pybuda.detect_available_devices()[0] != BackendDevice.Grayskull:
os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
os.environ["PYBUDA_EXP_APPROX"] = "1"
pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(op_type="subtract", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)

available_devices = pybuda.detect_available_devices()
if available_devices[0] == BackendDevice.Grayskull:
os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{18*1024}"
if config == "large":
pybuda.config.override_op_size("gelu_103", (3, 1))
if data_type == "Fp16_b":
os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1" #overlay blob issue on bfp8
os.environ["PYBUDA_RIBBON2_OPTIMIZATION_ITERATIONS"] = "10"

# Set model parameters based on chosen task and model configuration
if task == "na":
Expand Down
22 changes: 13 additions & 9 deletions benchmark/models/deit/deit.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,22 @@ def deit(training: bool, task: str, config: str, microbatch: int, device: str, d
compiler_cfg.balancer_policy = "Ribbon"
os.environ["PYBUDA_RIBBON2"] = "1"

# These are about to be enabled by default.
#
if data_type != "Bfp8_b":
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
# These are about to be enabled by default.
#
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_RIBBON2_CONSERVATIVE_OPTIMIZATION_ITERATIONS"] = "10"

if data_type == "Fp16_b":
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES_APPLY_FILTERING"] = "1"
os.environ["PYBUDA_TEMP_DISABLE_MODEL_KB_PROLOGUE_BW"] = "1"

if data_type == "Bfp8_b":
os.environ["PYBUDA_FORK_JOIN_BUF_QUEUES"] = "1"
pybuda.config.configure_mixed_precision(op_type="reciprocal", output_df=pybuda.DataFormat.Float16_b)
os.environ["PYBUDA_RIBBON2_OPTIMIZATION_ITERATIONS"] = "10"
os.environ["PYBUDA_TEMP_BALANCER_MODEL_PCIE_BW"] = "0"
os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"

# Set model parameters based on chosen task and model configuration
img_res = 224
Expand Down
15 changes: 15 additions & 0 deletions benchmark/models/falcon/falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,21 @@ def falcon(
training: bool, task: str, config: str, microbatch: int, device: str, data_type: str, benchmark_run: BenchmarkRun
):

import os
import pybuda
compiler_cfg = pybuda.config._get_global_compiler_config()

if compiler_cfg.balancer_policy == "default":
compiler_cfg.balancer_policy = "Ribbon"
os.environ["PYBUDA_RIBBON2"] = "1"

os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_EXP_APPROX"] = "1"
os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "233472"

# Set model parameters based on chosen task and model configuration
if task in ["na", "hellaswag", "text_summarization", "alpacaeval"]:
if config == "7b":
Expand Down
2 changes: 1 addition & 1 deletion benchmark/models/falcon/utils/pybudify.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def __init__(

pybuda_arch = {
"grayskull": pybuda.BackendDevice.Grayskull,
"wormhole": pybuda.BackendDevice.Wormhole,
# "wormhole": pybuda.BackendDevice.Wormhole,
"wormhole_b0": pybuda.BackendDevice.Wormhole_B0,
}[arch]

Expand Down
44 changes: 35 additions & 9 deletions benchmark/models/flant5/flant5_past_cache_enc_dec.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,28 +61,54 @@ def flant5_past_cache_enc_dec(training: bool, task: str, config: str, microbatch
import pybuda
from pybuda.pybudaglobal import TILE_DIM

# Add PyBUDA configurations
# ---------------------------------------------------------------------------------------- #
# flan-T5, START
# ---------------------------------------------------------------------------------------- #

compiler_cfg = pybuda.config._get_global_compiler_config()

if compiler_cfg.balancer_policy == "default":
compiler_cfg.balancer_policy = "Ribbon"
os.environ["PYBUDA_RIBBON2"] = "1"

# These are about to be enabled by default.
#
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_EXP_APPROX"] = "1"

# ---------------------------------------------------------------------------------------- #
# flan-T5, END
# ---------------------------------------------------------------------------------------- #


# ---------------------------------------------------------------------------------------- #
# Generate T5 past cache encoder-decoder, START
# ---------------------------------------------------------------------------------------- #

# T5 past cache encoder-decoder overrides (I)
# Flags
os.environ["PYBUDA_PAD_OUTPUT_BUFFER"] = "1"
os.environ["TT_BACKEND_MULTI_THREADED_PUSH"] = "1"
os.environ["PYBUDA_DISABLE_DYNAMIC_DRAM"] = "1"
os.environ["PYBUDA_EXTRA_L1_MARGIN"] = "120000"
os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1"
os.environ["PYBUDA_NLP_MANUAL_TARGET"] = "26000"
os.environ["TT_BACKEND_DRAM_POLLING_FREQUENCY"] = "64"
os.environ["TT_BACKEND_PROFILER"] = "1"
os.environ["TT_BACKEND_EPOCH_BIN_NUM_SLOTS"] = "64"
os.environ["PYBUDA_ROTATE_PAST_CACHE_PARAMS"] = "1"


# Compiler configurations
compiler_cfg = pybuda.config._get_global_compiler_config()
compiler_cfg.enable_t_streaming = True
compiler_cfg.enable_tvm_cpu_fallback = False
compiler_cfg.default_df_override = pybuda._C.Float16_b
compiler_cfg.default_dram_parameters = False
compiler_cfg.input_queues_on_host = True
compiler_cfg.enable_amp_light()
compiler_cfg.compile_subgraphs = True
compiler_cfg.enable_link_past_cache_ios = True

# ---------------------------------------------------------------------------------------- #
# Generate T5 past cache encoder-decoder, END
# ---------------------------------------------------------------------------------------- #

# Set model parameters based on chosen task and model configuration
if task in ["na", "text_classification", "text_summarization"]:
if config == "small":
Expand Down
40 changes: 22 additions & 18 deletions benchmark/models/hrnet/hrnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,34 +34,30 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str,
from pybuda._C.backend_api import BackendDevice

compiler_cfg = pybuda.config._get_global_compiler_config()
compiler_cfg.enable_auto_transposing_placement = True

if compiler_cfg.balancer_policy == "default":
compiler_cfg.balancer_policy = "Ribbon"
os.environ["PYBUDA_RIBBON2"] = "1"
os.environ["PYBUDA_RIBBON2_OPTIMIZATION_ITERATIONS"] = "10"
os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46"
os.environ["PYBUDA_LEGACY_KERNEL_BROADCAST"] = "1"

if data_type != "Bfp8_b":
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_RIBBON2_DISABLE_NON_MATMUL_UTIL"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "46" # removing causes hang #2139
os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"

# These are about to be enabled by default.
#
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
if data_type == "Fp16_b":
# Hangs with autotranspose on #2542
compiler_cfg.enable_auto_transposing_placement = False
os.environ["PYBUDA_RIBBON2_OPTIMIZATION_ITERATIONS"] = "10"

# Manually enable amp light for Ribbon
if compiler_cfg.balancer_policy == "Ribbon":
compiler_cfg.enable_amp_light()

if config == "v2_w64":
if "TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE" not in os.environ:
os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{10*1024}"
available_devices = pybuda.detect_available_devices()
if available_devices:
if available_devices[0] == BackendDevice.Grayskull:
pybuda.config.set_epoch_break("add_618")
pybuda.config.insert_buffering_nop("add_442", ["add_471"], nop_count=20)

# Set model parameters based on chosen task and model configuration
img_res = 224
target_microbatch = 32
Expand All @@ -82,6 +78,14 @@ def hrnet(training: bool, task: str, config: str, microbatch: int, device: str,
model_name = "hrnetv2_w48"
elif config == "v2_w64":
model_name = "hrnetv2_w64"
model_name = "hrnetv2_w64"
if data_type == "Bfp8_b":
if "TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE" not in os.environ:
os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{10*1024}"
available_devices = pybuda.detect_available_devices()
if available_devices:
if available_devices[0] == BackendDevice.Grayskull:
pybuda.config._internal_insert_fj_buffering_nop('add_312', ['add_341'], nop_count=2)
else:
raise RuntimeError("Unknown config")

Expand Down
16 changes: 10 additions & 6 deletions benchmark/models/inception_v4/inception_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,20 @@ def inception_v4(training: bool, task: str, config: str, microbatch: int, device

# Configurations
compiler_cfg = pybuda.config._get_global_compiler_config() # load global compiler config object
compiler_cfg.enable_auto_transposing_placement = True

if compiler_cfg.balancer_policy == "default":
compiler_cfg.balancer_policy = "Ribbon"
os.environ["PYBUDA_RIBBON2"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
if data_type != "Bfp8_b":
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_OP_MODEL_COMPARE_VERSION"] = "1"
else:
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"

# These are about to be enabled by default.
#
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_RIBBON2_CONSERVATIVE_OPTIMIZATION_ITERATIONS"] = "10"

if data_type == "Bfp8_b":
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"

if config == "224":
model_name = "inception_v4"
Expand Down
18 changes: 12 additions & 6 deletions benchmark/models/mobilenet_v1/mobilenet_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,26 @@ def mobilenetv1(training: bool, task: str, config: str, microbatch: int, device:
compiler_cfg.balancer_policy = "Ribbon"
os.environ["PYBUDA_RIBBON2"] = "1"

os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "8"
os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"

# These are about to be enabled by default.
#
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
if data_type != "Bfp8_b":
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_RIBBON2_CONSERVATIVE_OPTIMIZATION_ITERATIONS"] = "10"

if data_type == "Fp16_b":
os.environ["PYBUDA_SUPRESS_T_FACTOR_MM"] = "40"
os.environ["PYBUDA_TEMP_DISABLE_MODEL_KB_PROLOGUE_BW"] = "1"

if data_type == "Bfp8_b":
# tenstorrent/pybuda#2228
os.environ["PYBUDA_LEGACY_KERNEL_BROADCAST"] = "1"
os.environ["PYBUDA_FUSE_DF_OVERRIDE"] = "0"
pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(op_type="depthwise", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2)
pybuda.config.configure_mixed_precision(op_type="depthwise", output_df=pybuda.DataFormat.Float16_b, math_fidelity=pybuda.MathFidelity.HiFi2)

# Set model parameters based on chosen task and model configuration
if config == "192":
Expand Down
33 changes: 18 additions & 15 deletions benchmark/models/mobilenet_v2/mobilenet_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,31 @@ def mobilenetv2(training: bool, task: str, config: str, microbatch: int, device:
# These are about to be enabled by default.
#
os.environ["PYBUDA_TEMP_ENABLE_NEW_FUSED_ESTIMATES"] = "1"
if data_type != "Bfp8_b":
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"
os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_TEMP_SCALE_SPARSE_ESTIMATE_ARGS"] = "1"
os.environ["PYBUDA_RIBBON2_CALCULATE_TARGET_CYCLES"] = "1"
os.environ["PYBUDA_TEMP_ENABLE_NEW_SPARSE_ESTIMATES"] = "1"

if pybuda.detect_available_devices()[0] != BackendDevice.Grayskull:
os.environ["PYBUDA_MAXIMIZE_SPARSE_UBLOCK"] = "1"
os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1"
os.environ["PYBUDA_RIBBON2_OPTIMIZATION_ITERATIONS"] = "10"
os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1"
if data_type == "Fp16_b":
os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1"
os.environ["PYBUDA_ENABLE_HOST_INPUT_NOP_BUFFERING"] = "1"

if data_type == "Bfp8_b":
pybuda.config.configure_mixed_precision(name_regex="input.*add.*", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(op_type="add", output_df=pybuda.DataFormat.Float16_b)
pybuda.config.configure_mixed_precision(
op_type="depthwise",
input_df={
1: (pybuda.DataFormat.Float16_b, False),
},
output_df=pybuda.DataFormat.Float16_b,
op_type="depthwise",
input_df={1: (pybuda.DataFormat.Float16_b, False),},
output_df=pybuda.DataFormat.Float16_b,
math_fidelity=pybuda.MathFidelity.HiFi2
)
pybuda.config.configure_mixed_precision(op_type="multiply", math_fidelity=pybuda.MathFidelity.HiFi2)
pybuda.config.configure_mixed_precision(op_type="matmul", math_fidelity=pybuda.MathFidelity.HiFi2)

if pybuda.detect_available_devices()[0] != BackendDevice.Grayskull:
os.environ["PYBUDA_MAXIMIZE_SPARSE_UBLOCK"] = "1"
os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1"
os.environ["PYBUDA_RIBBON2_OPTIMIZATION_ITERATIONS"] = "10"
os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1"

# Set model parameters based on chosen task and model configuration
if config == "224":
Expand Down
Loading

0 comments on commit 1839b59

Please sign in to comment.