Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support device detection with new benchmark suite #13182

Merged
merged 8 commits into from
May 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 46 additions & 26 deletions build_tools/benchmarks/common/benchmark_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
from enum import Enum
from typing import Any, Dict, List, Optional, Sequence, Tuple

# A map from CPU ABI to IREE's benchmark target architecture.
CPU_ABI_TO_TARGET_ARCH_MAP = {
from e2e_test_framework.definitions import common_definitions

# A map from CPU ABI to IREE's legacy benchmark target architecture.
pzread marked this conversation as resolved.
Show resolved Hide resolved
CPU_ABI_TO_LEGACY_TARGET_ARCH_MAP = {
"arm64-v8a": "cpu-arm64-v8a",
"x86_64": "cpu-x86_64",
"x86_64-cascadeLake": "cpu-x86_64-cascadelake",
}

# A map from GPU name to IREE's benchmark target architecture.
GPU_NAME_TO_TARGET_ARCH_MAP = {
# A map from GPU name to IREE's legacy benchmark target architecture.
GPU_NAME_TO_LEGACY_TARGET_ARCH_MAP = {
"adreno-640": "gpu-adreno",
"adreno-650": "gpu-adreno",
"adreno-660": "gpu-adreno",
Expand All @@ -36,11 +38,28 @@
"tesla-v100-sxm2-16gb": "gpu-cuda-sm_70",
"nvidia-a100-sxm4-40gb": "gpu-cuda-sm_80",
"nvidia-geforce-rtx-3090": "gpu-cuda-sm_80",
"unknown": "gpu-unknown",
}

# A map of canonical microarchitecture names.
CANONICAL_MICROARCHITECTURE_NAMES = {"CascadeLake", "Zen2"}
# A map from CPU ABI to IREE's benchmark target architecture.
CPU_ABI_TO_TARGET_ARCH_MAP = {
"arm64-v8a":
common_definitions.DeviceArchitecture.ARMV8_2_A_GENERIC,
"x86_64-cascadelake":
common_definitions.DeviceArchitecture.X86_64_CASCADELAKE,
}

# A map from GPU name to IREE's benchmark target architecture.
GPU_NAME_TO_TARGET_ARCH_MAP = {
"adreno-640": common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
"adreno-650": common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
"adreno-660": common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
"adreno-730": common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
"mali-g77": common_definitions.DeviceArchitecture.ARM_VALHALL,
"mali-g78": common_definitions.DeviceArchitecture.ARM_VALHALL,
"tesla-v100-sxm2-16gb": common_definitions.DeviceArchitecture.CUDA_SM70,
"nvidia-a100-sxm4-40gb": common_definitions.DeviceArchitecture.CUDA_SM80,
"nvidia-geforce-rtx-3090": common_definitions.DeviceArchitecture.CUDA_SM80,
}


@dataclasses.dataclass
Expand Down Expand Up @@ -224,28 +243,29 @@ def __str__(self):
params = ", ".join(params)
return f"{self.platform_type.value} device <{params}>"

def get_iree_cpu_arch_name(self) -> str:
arch = CPU_ABI_TO_TARGET_ARCH_MAP.get(self.cpu_abi.lower())
if not arch:
raise ValueError(f"Unrecognized CPU ABI: '{self.cpu_abi}'; "
"need to update the map")

def get_iree_cpu_arch_name(self,
pzread marked this conversation as resolved.
Show resolved Hide resolved
use_legacy_name: bool = False) -> Optional[str]:
name = self.cpu_abi.lower()
if self.cpu_uarch:
if self.cpu_uarch not in CANONICAL_MICROARCHITECTURE_NAMES:
raise ValueError(
f"Unrecognized CPU microarchitecture: '{self.cpu_uarch}'; "
"need to update the map")
name += f"-{self.cpu_uarch.lower()}"

if use_legacy_name:
return CPU_ABI_TO_LEGACY_TARGET_ARCH_MAP.get(name)

arch = CPU_ABI_TO_TARGET_ARCH_MAP.get(name)
# TODO(#11076): Return common_definitions.DeviceArchitecture instead after
# removing the legacy path.
return None if arch is None else str(arch)

arch = f'{arch}-{self.cpu_uarch.lower()}'
def get_iree_gpu_arch_name(self,
use_legacy_name: bool = False) -> Optional[str]:
name = self.gpu_name.lower()

return arch
if use_legacy_name:
return GPU_NAME_TO_LEGACY_TARGET_ARCH_MAP.get(name)

def get_iree_gpu_arch_name(self) -> str:
arch = GPU_NAME_TO_TARGET_ARCH_MAP.get(self.gpu_name.lower())
if not arch:
raise ValueError(f"Unrecognized GPU name: '{self.gpu_name}'; "
"need to update the map")
return arch
arch = GPU_NAME_TO_TARGET_ARCH_MAP.get(name)
return None if arch is None else str(arch)

def get_detailed_cpu_arch_name(self) -> str:
"""Returns the detailed architecture name."""
Expand Down
22 changes: 18 additions & 4 deletions build_tools/benchmarks/common/benchmark_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,31 @@ def run(self) -> None:
self.config.trace_capture_config.capture_tmp_dir.mkdir(parents=True,
exist_ok=True)

cpu_target_arch = self.device_info.get_iree_cpu_arch_name()
gpu_target_arch = self.device_info.get_iree_gpu_arch_name()
use_legacy_name = self.benchmark_suite.legacy_suite

target_architectures = []
cpu_target_arch = self.device_info.get_iree_cpu_arch_name(use_legacy_name)
if cpu_target_arch is None:
print("WARNING: Detected unsupported CPU architecture in "
f'"{self.device_info}", CPU benchmarking is disabled.')
else:
target_architectures.append(cpu_target_arch)

gpu_target_arch = self.device_info.get_iree_gpu_arch_name(use_legacy_name)
if gpu_target_arch is None:
print("WARNING: Detected unsupported GPU architecture in "
f'"{self.device_info}", GPU benchmarking is disabled.')
else:
target_architectures.append(gpu_target_arch)

drivers, loaders = self.__get_available_drivers_and_loaders()

for category, _ in self.benchmark_suite.list_categories():
benchmark_cases = self.benchmark_suite.filter_benchmarks_for_category(
category=category,
available_drivers=drivers,
available_loaders=loaders,
cpu_target_arch_filter=f"^{cpu_target_arch}$",
gpu_target_arch_filter=f"^{gpu_target_arch}$",
target_architectures=target_architectures,
driver_filter=self.config.driver_filter,
mode_filter=self.config.mode_filter,
model_name_filter=self.config.model_name_filter)
Expand Down
118 changes: 78 additions & 40 deletions build_tools/benchmarks/common/benchmark_driver_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
from common.benchmark_definition import (IREE_DRIVERS_INFOS, DeviceInfo,
PlatformType, BenchmarkLatency,
BenchmarkMemory, BenchmarkMetrics)
from e2e_test_framework.definitions import common_definitions, iree_definitions


class FakeBenchmarkDriver(BenchmarkDriver):

def __init__(self,
*args,
raise_exception_on_case: Optional[str] = None,
raise_exception_on_case: Optional[BenchmarkCase] = None,
**kwargs):
super().__init__(*args, **kwargs)
self.raise_exception_on_case = raise_exception_on_case
Expand All @@ -32,8 +33,7 @@ def __init__(self,
def run_benchmark_case(self, benchmark_case: BenchmarkCase,
benchmark_results_filename: Optional[pathlib.Path],
capture_filename: Optional[pathlib.Path]) -> None:
if (self.raise_exception_on_case is not None and
self.raise_exception_on_case in str(benchmark_case.benchmark_case_dir)):
if self.raise_exception_on_case == benchmark_case:
raise Exception("fake exception")

self.run_benchmark_cases.append(benchmark_case)
Expand Down Expand Up @@ -83,27 +83,74 @@ def setUp(self):

self.device_info = DeviceInfo(platform_type=PlatformType.LINUX,
model="Unknown",
cpu_abi="arm64-v8a",
cpu_uarch=None,
cpu_features=["sha2"],
gpu_name="Mali-G78")

case1 = BenchmarkCase(model_name="DeepNet",
model_tags=[],
bench_mode=["1-thread", "full-inference"],
target_arch="CPU-ARM64-v8A",
driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu"],
benchmark_case_dir=pathlib.Path("case1"),
benchmark_tool_name="tool")
case2 = BenchmarkCase(model_name="DeepNetv2",
model_tags=["f32"],
bench_mode=["full-inference"],
target_arch="CPU-ARM64-v8A",
driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu-sync"],
benchmark_case_dir=pathlib.Path("case2"),
benchmark_tool_name="tool")
cpu_abi="x86_64",
cpu_uarch="CascadeLake",
cpu_features=[],
gpu_name="unknown")

model_tflite = common_definitions.Model(
id="tflite",
name="model_tflite",
tags=[],
source_type=common_definitions.ModelSourceType.EXPORTED_TFLITE,
source_url="",
entry_function="predict",
input_types=["1xf32"])
device_spec = common_definitions.DeviceSpec.build(
id="dev",
device_name="test_dev",
architecture=common_definitions.DeviceArchitecture.X86_64_CASCADELAKE,
host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
device_parameters=[],
tags=[])
compile_target = iree_definitions.CompileTarget(
target_backend=iree_definitions.TargetBackend.LLVM_CPU,
target_architecture=(
common_definitions.DeviceArchitecture.X86_64_CASCADELAKE),
target_abi=iree_definitions.TargetABI.LINUX_GNU)
gen_config = iree_definitions.ModuleGenerationConfig.build(
imported_model=iree_definitions.ImportedModel.from_model(model_tflite),
compile_config=iree_definitions.CompileConfig.build(
id="comp_a", tags=[], compile_targets=[compile_target]))
exec_config_a = iree_definitions.ModuleExecutionConfig.build(
id="exec_a",
tags=["sync"],
loader=iree_definitions.RuntimeLoader.EMBEDDED_ELF,
driver=iree_definitions.RuntimeDriver.LOCAL_SYNC)
run_config_a = iree_definitions.E2EModelRunConfig.build(
module_generation_config=gen_config,
module_execution_config=exec_config_a,
target_device_spec=device_spec,
input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE)
exec_config_b = iree_definitions.ModuleExecutionConfig.build(
id="exec_b",
tags=["task"],
loader=iree_definitions.RuntimeLoader.EMBEDDED_ELF,
driver=iree_definitions.RuntimeDriver.LOCAL_TASK)
run_config_b = iree_definitions.E2EModelRunConfig.build(
module_generation_config=gen_config,
module_execution_config=exec_config_b,
target_device_spec=device_spec,
input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE)
self.case1 = BenchmarkCase(
model_name="model_tflite",
model_tags=[],
bench_mode=["sync"],
target_arch="x86_64-cascadelake",
driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu-sync"],
benchmark_tool_name="tool",
run_config=run_config_a)
self.case2 = BenchmarkCase(model_name="model_tflite",
model_tags=[],
bench_mode=["task"],
target_arch="x86_64-cascadelake",
driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu"],
benchmark_tool_name="tool",
run_config=run_config_b)
self.benchmark_suite = BenchmarkSuite({
pathlib.Path("suite/TFLite"): [case1, case2],
pathlib.Path("suite/TFLite"): [self.case1, self.case2],
})

def tearDown(self) -> None:
Expand All @@ -121,16 +168,12 @@ def test_run(self):
self.assertEqual(
driver.get_benchmark_results().benchmarks[0].metrics.raw_data, {})
self.assertEqual(driver.get_benchmark_result_filenames(), [
self.benchmark_results_dir /
"DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).json",
self.benchmark_results_dir /
"DeepNetv2 [f32] (TFLite) full-inference with IREE-LLVM-CPU-Sync @ Unknown (CPU-ARMv8-A).json"
self.benchmark_results_dir / f"{self.case1.run_config}.json",
self.benchmark_results_dir / f"{self.case2.run_config}.json"
])
self.assertEqual(driver.get_capture_filenames(), [
self.captures_dir /
"DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).tracy",
self.captures_dir /
"DeepNetv2 [f32] (TFLite) full-inference with IREE-LLVM-CPU-Sync @ Unknown (CPU-ARMv8-A).tracy"
self.captures_dir / f"{self.case1.run_config}.tracy",
self.captures_dir / f"{self.case2.run_config}.tracy"
])
self.assertEqual(driver.get_benchmark_errors(), [])

Expand All @@ -149,23 +192,18 @@ def test_run_with_exception_and_keep_going(self):
driver = FakeBenchmarkDriver(self.device_info,
self.config,
self.benchmark_suite,
raise_exception_on_case="case1")
raise_exception_on_case=self.case1)

driver.run()

self.assertEqual(len(driver.get_benchmark_errors()), 1)
self.assertEqual(len(driver.get_benchmark_result_filenames()), 1)

def test_run_with_previous_benchmarks_and_captures(self):
benchmark_filename = (
self.benchmark_results_dir /
"DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).json"
)
benchmark_filename = (self.benchmark_results_dir /
f"{self.case1.run_config}.json")
benchmark_filename.touch()
capture_filename = (
self.captures_dir /
"DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).tracy"
)
capture_filename = self.captures_dir / f"{self.case1.run_config}.tracy"
capture_filename.touch()
config = dataclasses.replace(self.config, continue_from_previous=True)
driver = FakeBenchmarkDriver(device_info=self.device_info,
Expand Down
Loading