Merge 177a3da into 5363ea3

iree-org · May 2, 2023 · a3449ae · a3449ae
2 parents 5363ea3 + 177a3da
commit a3449ae
Show file tree

Hide file tree

Showing 7 changed files with 194 additions and 112 deletions.
diff --git a/build_tools/benchmarks/common/benchmark_definition.py b/build_tools/benchmarks/common/benchmark_definition.py
@@ -19,14 +19,16 @@
 from enum import Enum
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 
-# A map from CPU ABI to IREE's benchmark target architecture.
-CPU_ABI_TO_TARGET_ARCH_MAP = {
+from e2e_test_framework.definitions import common_definitions
+
+# A map from CPU ABI to IREE's legacy benchmark target architecture.
+CPU_ABI_TO_LEGACY_TARGET_ARCH_MAP = {
     "arm64-v8a": "cpu-arm64-v8a",
-    "x86_64": "cpu-x86_64",
+    "x86_64-cascadeLake": "cpu-x86_64-cascadelake",
 }
 
-# A map from GPU name to IREE's benchmark target architecture.
-GPU_NAME_TO_TARGET_ARCH_MAP = {
+# A map from GPU name to IREE's legacy benchmark target architecture.
+GPU_NAME_TO_LEGACY_TARGET_ARCH_MAP = {
     "adreno-640": "gpu-adreno",
     "adreno-650": "gpu-adreno",
     "adreno-660": "gpu-adreno",
@@ -36,11 +38,28 @@
     "tesla-v100-sxm2-16gb": "gpu-cuda-sm_70",
     "nvidia-a100-sxm4-40gb": "gpu-cuda-sm_80",
     "nvidia-geforce-rtx-3090": "gpu-cuda-sm_80",
-    "unknown": "gpu-unknown",
 }
 
-# A map of canonical microarchitecture names.
-CANONICAL_MICROARCHITECTURE_NAMES = {"CascadeLake", "Zen2"}
+# A map from CPU ABI to IREE's benchmark target architecture.
+CPU_ABI_TO_TARGET_ARCH_MAP = {
+    "arm64-v8a":
+        common_definitions.DeviceArchitecture.ARMV8_2_A_GENERIC,
+    "x86_64-cascadelake":
+        common_definitions.DeviceArchitecture.X86_64_CASCADELAKE,
+}
+
+# A map from GPU name to IREE's benchmark target architecture.
+GPU_NAME_TO_TARGET_ARCH_MAP = {
+    "adreno-640": common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
+    "adreno-650": common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
+    "adreno-660": common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
+    "adreno-730": common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
+    "mali-g77": common_definitions.DeviceArchitecture.ARM_VALHALL,
+    "mali-g78": common_definitions.DeviceArchitecture.ARM_VALHALL,
+    "tesla-v100-sxm2-16gb": common_definitions.DeviceArchitecture.CUDA_SM70,
+    "nvidia-a100-sxm4-40gb": common_definitions.DeviceArchitecture.CUDA_SM80,
+    "nvidia-geforce-rtx-3090": common_definitions.DeviceArchitecture.CUDA_SM80,
+}
 
 
 @dataclasses.dataclass
@@ -224,28 +243,29 @@ def __str__(self):
     params = ", ".join(params)
     return f"{self.platform_type.value} device <{params}>"
 
-  def get_iree_cpu_arch_name(self) -> str:
-    arch = CPU_ABI_TO_TARGET_ARCH_MAP.get(self.cpu_abi.lower())
-    if not arch:
-      raise ValueError(f"Unrecognized CPU ABI: '{self.cpu_abi}'; "
-                       "need to update the map")
-
+  def get_iree_cpu_arch_name(self,
+                             use_legacy_name: bool = False) -> Optional[str]:
+    name = self.cpu_abi.lower()
     if self.cpu_uarch:
-      if self.cpu_uarch not in CANONICAL_MICROARCHITECTURE_NAMES:
-        raise ValueError(
-            f"Unrecognized CPU microarchitecture: '{self.cpu_uarch}'; "
-            "need to update the map")
+      name += f"-{self.cpu_uarch.lower()}"
+
+    if use_legacy_name:
+      return CPU_ABI_TO_LEGACY_TARGET_ARCH_MAP.get(name)
+
+    arch = CPU_ABI_TO_TARGET_ARCH_MAP.get(name)
+    # TODO(#11076): Return common_definitions.DeviceArchitecture instead after
+    # removing the legacy path.
+    return None if arch is None else str(arch)
 
-      arch = f'{arch}-{self.cpu_uarch.lower()}'
+  def get_iree_gpu_arch_name(self,
+                             use_legacy_name: bool = False) -> Optional[str]:
+    name = self.gpu_name.lower()
 
-    return arch
+    if use_legacy_name:
+      return GPU_NAME_TO_LEGACY_TARGET_ARCH_MAP.get(name)
 
-  def get_iree_gpu_arch_name(self) -> str:
-    arch = GPU_NAME_TO_TARGET_ARCH_MAP.get(self.gpu_name.lower())
-    if not arch:
-      raise ValueError(f"Unrecognized GPU name: '{self.gpu_name}'; "
-                       "need to update the map")
-    return arch
+    arch = GPU_NAME_TO_TARGET_ARCH_MAP.get(name)
+    return None if arch is None else str(arch)
 
   def get_detailed_cpu_arch_name(self) -> str:
     """Returns the detailed architecture name."""

diff --git a/build_tools/benchmarks/common/benchmark_driver.py b/build_tools/benchmarks/common/benchmark_driver.py
@@ -66,17 +66,31 @@ def run(self) -> None:
       self.config.trace_capture_config.capture_tmp_dir.mkdir(parents=True,
                                                              exist_ok=True)
 
-    cpu_target_arch = self.device_info.get_iree_cpu_arch_name()
-    gpu_target_arch = self.device_info.get_iree_gpu_arch_name()
+    use_legacy_name = self.benchmark_suite.legacy_suite
+
+    target_architectures = []
+    cpu_target_arch = self.device_info.get_iree_cpu_arch_name(use_legacy_name)
+    if cpu_target_arch is None:
+      print("WARNING: Detected unsupported CPU architecture in "
+            f'"{self.device_info}", CPU benchmarking is disabled.')
+    else:
+      target_architectures.append(cpu_target_arch)
+
+    gpu_target_arch = self.device_info.get_iree_gpu_arch_name(use_legacy_name)
+    if gpu_target_arch is None:
+      print("WARNING: Detected unsupported GPU architecture in "
+            f'"{self.device_info}", GPU benchmarking is disabled.')
+    else:
+      target_architectures.append(gpu_target_arch)
+
     drivers, loaders = self.__get_available_drivers_and_loaders()
 
     for category, _ in self.benchmark_suite.list_categories():
       benchmark_cases = self.benchmark_suite.filter_benchmarks_for_category(
           category=category,
           available_drivers=drivers,
           available_loaders=loaders,
-          cpu_target_arch_filter=f"^{cpu_target_arch}$",
-          gpu_target_arch_filter=f"^{gpu_target_arch}$",
+          target_architectures=target_architectures,
           driver_filter=self.config.driver_filter,
           mode_filter=self.config.mode_filter,
           model_name_filter=self.config.model_name_filter)

diff --git a/build_tools/benchmarks/common/benchmark_driver_test.py b/build_tools/benchmarks/common/benchmark_driver_test.py
@@ -17,13 +17,14 @@
 from common.benchmark_definition import (IREE_DRIVERS_INFOS, DeviceInfo,
                                          PlatformType, BenchmarkLatency,
                                          BenchmarkMemory, BenchmarkMetrics)
+from e2e_test_framework.definitions import common_definitions, iree_definitions
 
 
 class FakeBenchmarkDriver(BenchmarkDriver):
 
   def __init__(self,
                *args,
-               raise_exception_on_case: Optional[str] = None,
+               raise_exception_on_case: Optional[BenchmarkCase] = None,
                **kwargs):
     super().__init__(*args, **kwargs)
     self.raise_exception_on_case = raise_exception_on_case
@@ -32,8 +33,7 @@ def __init__(self,
   def run_benchmark_case(self, benchmark_case: BenchmarkCase,
                          benchmark_results_filename: Optional[pathlib.Path],
                          capture_filename: Optional[pathlib.Path]) -> None:
-    if (self.raise_exception_on_case is not None and
-        self.raise_exception_on_case in str(benchmark_case.benchmark_case_dir)):
+    if self.raise_exception_on_case == benchmark_case:
       raise Exception("fake exception")
 
     self.run_benchmark_cases.append(benchmark_case)
@@ -83,27 +83,74 @@ def setUp(self):
 
     self.device_info = DeviceInfo(platform_type=PlatformType.LINUX,
                                   model="Unknown",
-                                  cpu_abi="arm64-v8a",
-                                  cpu_uarch=None,
-                                  cpu_features=["sha2"],
-                                  gpu_name="Mali-G78")
-
-    case1 = BenchmarkCase(model_name="DeepNet",
-                          model_tags=[],
-                          bench_mode=["1-thread", "full-inference"],
-                          target_arch="CPU-ARM64-v8A",
-                          driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu"],
-                          benchmark_case_dir=pathlib.Path("case1"),
-                          benchmark_tool_name="tool")
-    case2 = BenchmarkCase(model_name="DeepNetv2",
-                          model_tags=["f32"],
-                          bench_mode=["full-inference"],
-                          target_arch="CPU-ARM64-v8A",
-                          driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu-sync"],
-                          benchmark_case_dir=pathlib.Path("case2"),
-                          benchmark_tool_name="tool")
+                                  cpu_abi="x86_64",
+                                  cpu_uarch="CascadeLake",
+                                  cpu_features=[],
+                                  gpu_name="unknown")
+
+    model_tflite = common_definitions.Model(
+        id="tflite",
+        name="model_tflite",
+        tags=[],
+        source_type=common_definitions.ModelSourceType.EXPORTED_TFLITE,
+        source_url="",
+        entry_function="predict",
+        input_types=["1xf32"])
+    device_spec = common_definitions.DeviceSpec.build(
+        id="dev",
+        device_name="test_dev",
+        architecture=common_definitions.DeviceArchitecture.X86_64_CASCADELAKE,
+        host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
+        device_parameters=[],
+        tags=[])
+    compile_target = iree_definitions.CompileTarget(
+        target_backend=iree_definitions.TargetBackend.LLVM_CPU,
+        target_architecture=(
+            common_definitions.DeviceArchitecture.X86_64_CASCADELAKE),
+        target_abi=iree_definitions.TargetABI.LINUX_GNU)
+    gen_config = iree_definitions.ModuleGenerationConfig.build(
+        imported_model=iree_definitions.ImportedModel.from_model(model_tflite),
+        compile_config=iree_definitions.CompileConfig.build(
+            id="comp_a", tags=[], compile_targets=[compile_target]))
+    exec_config_a = iree_definitions.ModuleExecutionConfig.build(
+        id="exec_a",
+        tags=["sync"],
+        loader=iree_definitions.RuntimeLoader.EMBEDDED_ELF,
+        driver=iree_definitions.RuntimeDriver.LOCAL_SYNC)
+    run_config_a = iree_definitions.E2EModelRunConfig.build(
+        module_generation_config=gen_config,
+        module_execution_config=exec_config_a,
+        target_device_spec=device_spec,
+        input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+        tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE)
+    exec_config_b = iree_definitions.ModuleExecutionConfig.build(
+        id="exec_b",
+        tags=["task"],
+        loader=iree_definitions.RuntimeLoader.EMBEDDED_ELF,
+        driver=iree_definitions.RuntimeDriver.LOCAL_TASK)
+    run_config_b = iree_definitions.E2EModelRunConfig.build(
+        module_generation_config=gen_config,
+        module_execution_config=exec_config_b,
+        target_device_spec=device_spec,
+        input_data=common_definitions.ZEROS_MODEL_INPUT_DATA,
+        tool=iree_definitions.E2EModelRunTool.IREE_BENCHMARK_MODULE)
+    self.case1 = BenchmarkCase(
+        model_name="model_tflite",
+        model_tags=[],
+        bench_mode=["sync"],
+        target_arch="x86_64-cascadelake",
+        driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu-sync"],
+        benchmark_tool_name="tool",
+        run_config=run_config_a)
+    self.case2 = BenchmarkCase(model_name="model_tflite",
+                               model_tags=[],
+                               bench_mode=["task"],
+                               target_arch="x86_64-cascadelake",
+                               driver_info=IREE_DRIVERS_INFOS["iree-llvm-cpu"],
+                               benchmark_tool_name="tool",
+                               run_config=run_config_b)
     self.benchmark_suite = BenchmarkSuite({
-        pathlib.Path("suite/TFLite"): [case1, case2],
+        pathlib.Path("suite/TFLite"): [self.case1, self.case2],
     })
 
   def tearDown(self) -> None:
@@ -121,16 +168,12 @@ def test_run(self):
     self.assertEqual(
         driver.get_benchmark_results().benchmarks[0].metrics.raw_data, {})
     self.assertEqual(driver.get_benchmark_result_filenames(), [
-        self.benchmark_results_dir /
-        "DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).json",
-        self.benchmark_results_dir /
-        "DeepNetv2 [f32] (TFLite) full-inference with IREE-LLVM-CPU-Sync @ Unknown (CPU-ARMv8-A).json"
+        self.benchmark_results_dir / f"{self.case1.run_config}.json",
+        self.benchmark_results_dir / f"{self.case2.run_config}.json"
     ])
     self.assertEqual(driver.get_capture_filenames(), [
-        self.captures_dir /
-        "DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).tracy",
-        self.captures_dir /
-        "DeepNetv2 [f32] (TFLite) full-inference with IREE-LLVM-CPU-Sync @ Unknown (CPU-ARMv8-A).tracy"
+        self.captures_dir / f"{self.case1.run_config}.tracy",
+        self.captures_dir / f"{self.case2.run_config}.tracy"
     ])
     self.assertEqual(driver.get_benchmark_errors(), [])
 
@@ -149,23 +192,18 @@ def test_run_with_exception_and_keep_going(self):
     driver = FakeBenchmarkDriver(self.device_info,
                                  self.config,
                                  self.benchmark_suite,
-                                 raise_exception_on_case="case1")
+                                 raise_exception_on_case=self.case1)
 
     driver.run()
 
     self.assertEqual(len(driver.get_benchmark_errors()), 1)
     self.assertEqual(len(driver.get_benchmark_result_filenames()), 1)
 
   def test_run_with_previous_benchmarks_and_captures(self):
-    benchmark_filename = (
-        self.benchmark_results_dir /
-        "DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).json"
-    )
+    benchmark_filename = (self.benchmark_results_dir /
+                          f"{self.case1.run_config}.json")
     benchmark_filename.touch()
-    capture_filename = (
-        self.captures_dir /
-        "DeepNet (TFLite) 1-thread,full-inference with IREE-LLVM-CPU @ Unknown (CPU-ARMv8-A).tracy"
-    )
+    capture_filename = self.captures_dir / f"{self.case1.run_config}.tracy"
     capture_filename.touch()
     config = dataclasses.replace(self.config, continue_from_previous=True)
     driver = FakeBenchmarkDriver(device_info=self.device_info,