diff --git a/docs/contribute.md b/docs/contribute.md
index d1e88dc..fa6596a 100644
--- a/docs/contribute.md
+++ b/docs/contribute.md
@@ -87,7 +87,7 @@ To add a runtime to a plugin:
     - `"RuntimeClass": <class_name>`, where `<class_name>` is a unique name for a Python class that inherits `BaseRT` and implements the runtime.
       - For example, `"RuntimeClass": ExampleRT` implements the `example` runtime.
       - The interface for the runtime class is defined in [Runtime Class](#runtime-class) below.
-    - (Optional) `"status_stats": List[str]`: a list of keys from the build stats that should be printed out at the end of benchmarking in the CLI's `Status` output. These keys, and corresponding values, must be set in the runtime class using `self.stats.add_build_stat(key, value)`.
+    - (Optional) `"status_stats": List[str]`: a list of keys from the build stats that should be printed out at the end of benchmarking in the CLI's `Status` output. These keys, and corresponding values, must be set in the runtime class using `self.stats.save_model_eval_stat(key, value)`.
     - (Optional) `"requirement_check": Callable`: a callable that runs before each benchmark. This may be used to check whether the device selected is available and functional before each benchmarking run. Exceptions raised during this callable will halt the benchmark of all selected files.
 
 1. Populate the package with the following files (see [Plugin Directory Layout](#plugin-directory-layout)):
diff --git a/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py b/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
index 7846a2a..2dc117d 100644
--- a/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
+++ b/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
@@ -51,8 +51,8 @@ def benchmark(self) -> MeasuredPerformance:
 
         # Assign values to the stats that will be printed
         # out by the CLI when status is reported
-        self.stats.add_build_stat("magic_perf_points", 42)
-        self.stats.add_build_stat("super_runtime_points", 100)
+        self.stats.save_model_eval_stat("magic_perf_points", 42)
+        self.stats.save_model_eval_stat("super_runtime_points", 100)
 
         return MeasuredPerformance(
             mean_latency=self.mean_latency,
diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index 3c213bb..cebf799 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -152,40 +152,40 @@ def explore_invocation(
         invocation_info.stats_keys = []
 
     # Create an ID for the build stats by combining the device and runtime.
-    # We don't need more info in the stats_id because changes to benchmark_model()
+    # We don't need more info in the evaluation_id because changes to benchmark_model()
     # arguments (e.g., sequence) will trigger a rebuild, which is intended to replace the
     # build stats so long as the device and runtime have not changed.
-    stats_id = f"{tracer_args.device}_{selected_runtime}"
+    evaluation_id = f"{tracer_args.device}_{selected_runtime}"
 
     stats = fs.Stats(
         tracer_args.cache_dir,
         build_name,
-        stats_id,
+        evaluation_id,
     )
     invocation_info.stats = stats
 
     # Stats that apply to the model, regardless of build
-    stats.save_stat(
+    stats.save_model_stat(
         fs.Keys.HASH,
         model_info.hash,
     )
-    stats.save_stat(
+    stats.save_model_stat(
         fs.Keys.MODEL_NAME,
         tracer_args.script_name,
     )
-    stats.save_stat(
+    stats.save_model_stat(
         fs.Keys.PARAMETERS,
         model_info.params,
     )
     if model_info.model_type != build.ModelType.ONNX_FILE:
-        stats.save_stat(fs.Keys.CLASS, type(model_info.model).__name__)
+        stats.save_model_stat(fs.Keys.CLASS, type(model_info.model).__name__)
     if fs.Keys.AUTHOR in tracer_args.labels:
-        stats.save_stat(fs.Keys.AUTHOR, tracer_args.labels[fs.Keys.AUTHOR][0])
+        stats.save_model_stat(fs.Keys.AUTHOR, tracer_args.labels[fs.Keys.AUTHOR][0])
     if fs.Keys.TASK in tracer_args.labels:
-        stats.save_stat(fs.Keys.TASK, tracer_args.labels[fs.Keys.TASK][0])
+        stats.save_model_stat(fs.Keys.TASK, tracer_args.labels[fs.Keys.TASK][0])
 
     # Save all of the lables in one place
-    stats.save_stat(fs.Keys.LABELS, tracer_args.labels)
+    stats.save_model_stat(fs.Keys.LABELS, tracer_args.labels)
 
     # If the input script is a built-in TurnkeyML model, make a note of
     # which one
@@ -203,18 +203,18 @@ def explore_invocation(
             fs.MODELS_DIR,
             f"https://github.com/onnx/turnkeyml/tree/{git_hash}/models",
         ).replace("\\", "/")
-        stats.save_stat(fs.Keys.MODEL_SCRIPT, relative_path)
+        stats.save_model_stat(fs.Keys.MODEL_SCRIPT, relative_path)
 
     # Build-specific stats
-    stats.add_build_stat(
+    stats.save_model_eval_stat(
         fs.Keys.DEVICE_TYPE,
         tracer_args.device,
     )
-    stats.add_build_stat(
+    stats.save_model_eval_stat(
         fs.Keys.RUNTIME,
         selected_runtime,
     )
-    stats.add_build_stat(
+    stats.save_model_eval_stat(
         fs.Keys.ITERATIONS,
         tracer_args.iterations,
     )
@@ -233,12 +233,14 @@ def explore_invocation(
             # we will try to catch the exception and note it in the stats.
             # If a concluded build still has a status of "running", this means
             # there was an uncaught exception.
-            stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.RUNNING)
+            stats.save_model_eval_stat(
+                fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.RUNNING
+            )
 
             perf = benchmark_model(
                 model_info.model,
                 inputs,
-                stats_id=stats_id,
+                evaluation_id=evaluation_id,
                 device=tracer_args.device,
                 runtime=selected_runtime,
                 build_name=build_name,
@@ -263,7 +265,7 @@ def explore_invocation(
         invocation_info.status_message = f"Build Error: {e}"
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
 
         _store_traceback(invocation_info)
 
@@ -275,14 +277,14 @@ def explore_invocation(
         )
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.KILLED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.KILLED)
 
     except exp.ArgError as e:
         # ArgError indicates that some argument to benchmark_model() was
         # illegal. In that case we want to halt execution so that users can
         # fix their arguments.
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
 
         raise e
 
@@ -290,7 +292,7 @@ def explore_invocation(
         invocation_info.status_message = f"Error: {e}."
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
 
         _store_traceback(invocation_info)
 
@@ -300,19 +302,21 @@ def explore_invocation(
         invocation_info.status_message = f"Unknown turnkey error: {e}"
         invocation_info.status_message_color = printing.Colors.WARNING
 
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
+        stats.save_model_eval_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.FAILED)
 
         _store_traceback(invocation_info)
     else:
         # If there was no exception then we consider the build to be a success
-        stats.add_build_stat(fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.SUCCESSFUL)
+        stats.save_model_eval_stat(
+            fs.Keys.BENCHMARK_STATUS, fs.BenchmarkStatus.SUCCESSFUL
+        )
 
     finally:
         # Ensure that stdout/stderr is not being forwarded before updating status
         util.stop_logger_forward()
 
         system_info = build.get_system_info()
-        stats.save_stat(
+        stats.save_model_stat(
             fs.Keys.SYSTEM_INFO,
             system_info,
         )
@@ -324,11 +328,11 @@ def explore_invocation(
 
             # ONNX stats that we want to save into the build's turnkey_stats.yaml file
             # so that they can be easily accessed by the report command later
-            if fs.Keys.ONNX_FILE in stats.build_stats.keys():
+            if fs.Keys.ONNX_FILE in stats.evaluation_stats.keys():
                 # Just in case the ONNX file was generated on a different machine:
                 # strip the state's cache dir, then prepend the current cache dir
                 final_onnx_file = fs.rebase_cache_dir(
-                    stats.build_stats[fs.Keys.ONNX_FILE],
+                    stats.evaluation_stats[fs.Keys.ONNX_FILE],
                     build_name,
                     tracer_args.cache_dir,
                 )
@@ -337,22 +341,22 @@ def explore_invocation(
                 onnx_model_info = util.populate_onnx_model_info(final_onnx_file)
                 onnx_input_dimensions = util.onnx_input_dimensions(final_onnx_file)
 
-                stats.save_stat(
+                stats.save_model_stat(
                     fs.Keys.ONNX_OPS_COUNTER,
                     onnx_ops_counter,
                 )
-                stats.save_stat(
+                stats.save_model_stat(
                     fs.Keys.ONNX_MODEL_INFO,
                     onnx_model_info,
                 )
-                stats.save_stat(
+                stats.save_model_stat(
                     fs.Keys.ONNX_INPUT_DIMENSIONS,
                     onnx_input_dimensions,
                 )
 
             if perf:
                 for key, value in vars(perf).items():
-                    stats.add_build_stat(
+                    stats.save_model_eval_stat(
                         key=key,
                         value=value,
                     )
diff --git a/src/turnkeyml/analyze/status.py b/src/turnkeyml/analyze/status.py
index c94542e..fae91f9 100644
--- a/src/turnkeyml/analyze/status.py
+++ b/src/turnkeyml/analyze/status.py
@@ -198,7 +198,7 @@ def print_invocation(
         if unique_invocation.stats_keys is not None:
             for key in unique_invocation.stats_keys:
                 nice_key = _pretty_print_key(key)
-                value = unique_invocation.stats.build_stats[key]
+                value = unique_invocation.stats.evaluation_stats[key]
                 printing.logn(f"{ident}\t\t\t{nice_key}:\t{value}")
         print()
     else:
diff --git a/src/turnkeyml/build/export.py b/src/turnkeyml/build/export.py
index 2f4bc88..6d32f05 100644
--- a/src/turnkeyml/build/export.py
+++ b/src/turnkeyml/build/export.py
@@ -181,8 +181,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
@@ -307,8 +309,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
@@ -428,8 +432,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
@@ -492,8 +498,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
@@ -596,8 +604,10 @@ def fire(self, state: build.State):
         if check_model(output_path, success_msg, fail_msg):
             state.intermediate_results = [output_path]
 
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
+            stats = fs.Stats(
+                state.cache_dir, state.config.build_name, state.evaluation_id
+            )
+            stats.save_model_eval_stat(
                 fs.Keys.ONNX_FILE,
                 output_path,
             )
diff --git a/src/turnkeyml/build/hummingbird.py b/src/turnkeyml/build/hummingbird.py
index 21d5f1a..5540482 100644
--- a/src/turnkeyml/build/hummingbird.py
+++ b/src/turnkeyml/build/hummingbird.py
@@ -217,9 +217,8 @@ def fire(self, state: build.State):
         np.save(state.original_inputs_file, state.inputs)
 
         state.intermediate_results = [output_path]
-        stats = fs.Stats(state.cache_dir, state.config.build_name)
-        stats.add_sub_stat(
-            state.stats_id,
+        stats = fs.Stats(state.cache_dir, state.config.build_name, state.evaluation_id)
+        stats.save_model_eval_stat(
             fs.Keys.ONNX_FILE,
             output_path,
         )
diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index add22ec..11184a1 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -250,7 +250,7 @@ def _rebuild_if_needed(
 
 def load_or_make_state(
     config: build.Config,
-    stats_id: str,
+    evaluation_id: str,
     cache_dir: str,
     rebuild: str,
     model_type: build.ModelType,
@@ -274,7 +274,7 @@ def load_or_make_state(
         "inputs": inputs,
         "monitor": monitor,
         "rebuild": rebuild,
-        "stats_id": stats_id,
+        "evaluation_id": evaluation_id,
         "cache_dir": cache_dir,
         "config": config,
         "model_type": model_type,
diff --git a/src/turnkeyml/build/stage.py b/src/turnkeyml/build/stage.py
index cd2ec53..0267995 100644
--- a/src/turnkeyml/build/stage.py
+++ b/src/turnkeyml/build/stage.py
@@ -273,8 +273,8 @@ def launch(self, state: build.State) -> build.State:
             raise exp.Error(msg)
 
         # Collect telemetry for the build
-        stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-        stats.add_build_stat(
+        stats = fs.Stats(state.cache_dir, state.config.build_name, state.evaluation_id)
+        stats.save_model_eval_stat(
             fs.Keys.ALL_BUILD_STAGES,
             self.get_names(),
         )
@@ -292,7 +292,7 @@ def launch(self, state: build.State) -> build.State:
                 # Collect telemetry about the stage
                 execution_time = time.time() - start_time
 
-                stats.add_build_sub_stat(
+                stats.save_model_eval_sub_stat(
                     parent_key=fs.Keys.COMPLETED_BUILD_STAGES,
                     key=stage.unique_name,
                     value=execution_time,
diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py
index 7322a15..349044e 100644
--- a/src/turnkeyml/build_api.py
+++ b/src/turnkeyml/build_api.py
@@ -11,7 +11,7 @@ def build_model(
     model: build.UnionValidModelInstanceTypes = None,
     inputs: Optional[Dict[str, Any]] = None,
     build_name: Optional[str] = None,
-    stats_id: Optional[str] = "build",
+    evaluation_id: Optional[str] = "build",
     cache_dir: str = filesystem.DEFAULT_CACHE_DIR,
     monitor: Optional[bool] = None,
     rebuild: Optional[str] = None,
@@ -30,7 +30,7 @@ def build_model(
         build_name: Unique name for the model that will be
             used to store the ONNX file and build state on disk. Defaults to the
             name of the file that calls build_model().
-        stats_id: Unique name for build statistics that should persist across multiple
+        evaluation_id: Unique name for evaluation statistics that should persist across multiple
             builds of the same model.
         cache_dir: Directory to use as the cache for this build. Output files
             from this build will be stored at cache_dir/build_name/
@@ -94,7 +94,7 @@ def build_model(
     # Get the state of the model from the cache if a valid build is available
     state = ignition.load_or_make_state(
         config=config,
-        stats_id=stats_id,
+        evaluation_id=evaluation_id,
         cache_dir=parsed_cache_dir,
         rebuild=rebuild or build.DEFAULT_REBUILD_POLICY,
         model_type=model_type,
diff --git a/src/turnkeyml/cli/report.py b/src/turnkeyml/cli/report.py
index 6f778f7..ec49a9f 100644
--- a/src/turnkeyml/cli/report.py
+++ b/src/turnkeyml/cli/report.py
@@ -47,7 +47,7 @@ def summary_spreadsheets(args) -> None:
     Path(report_dir).mkdir(parents=True, exist_ok=True)
 
     report: List[Dict] = []
-    all_build_stats = []
+    all_evaluation_stats = []
 
     # Add results from all user-provided cache folders
     for cache_dir in cache_dirs:
@@ -68,13 +68,13 @@ def summary_spreadsheets(args) -> None:
                     model_stats = yaml.load(stream, Loader=yaml.FullLoader)
 
                     # create a separate dict for each build
-                    for build in model_stats[fs.Keys.BUILDS].values():
-                        build_stats = {}
+                    for build in model_stats[fs.Keys.EVALUATIONS].values():
+                        evaluation_stats = {}
 
                         # Copy all of the stats for the model that are common across builds
                         for key, value in model_stats.items():
-                            if key != fs.Keys.BUILDS:
-                                build_stats[key] = value
+                            if key != fs.Keys.EVALUATIONS:
+                                evaluation_stats[key] = value
 
                         # Copy the build-specific stats
                         for key, value in build.items():
@@ -82,7 +82,7 @@ def summary_spreadsheets(args) -> None:
                             # to make analysis easier
                             if key == fs.Keys.COMPLETED_BUILD_STAGES:
                                 for subkey, subvalue in value.items():
-                                    build_stats[subkey] = subvalue
+                                    evaluation_stats[subkey] = subvalue
 
                             # If a build is still marked as "running" at reporting time, it
                             # must have been killed by a time out, out-of-memory (OOM), or some
@@ -93,30 +93,30 @@ def summary_spreadsheets(args) -> None:
                             ):
                                 value = fs.BenchmarkStatus.KILLED
 
-                            build_stats[key] = value
+                            evaluation_stats[key] = value
 
-                        all_build_stats.append(build_stats)
+                        all_evaluation_stats.append(evaluation_stats)
                 except yaml.scanner.ScannerError:
                     continue
 
         # Scan the build stats to determine the set of columns for the CSV file.
         # The CSV will have one column for every key in any build stats dict.
         column_headers = []
-        for build_stats in all_build_stats:
+        for evaluation_stats in all_evaluation_stats:
             # Add any key that isn't already in column_headers
-            for header in build_stats.keys():
+            for header in evaluation_stats.keys():
                 if header not in column_headers:
                     column_headers.append(header)
 
         # Add each build to the report
-        for build_stats in all_build_stats:
+        for evaluation_stats in all_evaluation_stats:
             # Start with a dictionary where all of the values are "-". If a build
             # has a value for each key we will fill it in, and otherwise the "-"
             # will indicate that no value was available
             result = {k: "-" for k in column_headers}
 
             for key in column_headers:
-                result[key] = _good_get(build_stats, key)
+                result[key] = _good_get(evaluation_stats, key)
 
             report.append(result)
 
@@ -133,13 +133,13 @@ def summary_spreadsheets(args) -> None:
 
     # Save the unique errors and counts to a file
     errors = []
-    for build_stats in all_build_stats:
+    for evaluation_stats in all_evaluation_stats:
         if (
-            "compilation_error" in build_stats.keys()
-            and "compilation_error_id" in build_stats.keys()
+            "compilation_error" in evaluation_stats.keys()
+            and "compilation_error_id" in evaluation_stats.keys()
         ):
-            error = build_stats["compilation_error"]
-            id = build_stats["compilation_error_id"]
+            error = evaluation_stats["compilation_error"]
+            id = evaluation_stats["compilation_error_id"]
             if id != "":
                 unique_error = True
                 for reported_error in errors:
@@ -148,13 +148,13 @@ def summary_spreadsheets(args) -> None:
                         reported_error["count"] = reported_error["count"] + 1
                         reported_error["models_impacted"] = reported_error[
                             "models_impacted"
-                        ] + [build_stats["model_name"]]
+                        ] + [evaluation_stats["model_name"]]
 
                 if unique_error:
                     reported_error = {
                         "id": id,
                         "count": 1,
-                        "models_impacted": [build_stats["model_name"]],
+                        "models_impacted": [evaluation_stats["model_name"]],
                         "example": error,
                     }
                     errors.append(reported_error)
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index 9f8ac38..cf76891 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -221,7 +221,7 @@ class State:
     monitor: bool = False
     rebuild: str = ""
     cache_dir: str = ""
-    stats_id: str = ""
+    evaluation_id: str = ""
 
     # User-provided args that will not be saved as part of state.yaml
     model: UnionValidModelInstanceTypes = None
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index 2767d15..961cb6b 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -331,8 +331,8 @@ class Keys:
     DEVICE_TYPE = "device_type"
     # Name of the model
     MODEL_NAME = "model_name"
-    # References the per-build stats section
-    BUILDS = "builds"
+    # References the per-evaluation stats section
+    EVALUATIONS = "evaluations"
     # Catch-all for storing a file's labels
     LABELS = "labels"
     # Author of the model
@@ -359,15 +359,15 @@ class BenchmarkStatus:
 
 
 class Stats:
-    def __init__(self, cache_dir: str, build_name: str, stats_id: str = None):
+    def __init__(self, cache_dir: str, build_name: str, evaluation_id: str = None):
         output_dir = build.output_dir(cache_dir, build_name)
 
         self.file = os.path.join(output_dir, "turnkey_stats.yaml")
-        self.stats_id = stats_id
+        self.evaluation_id = evaluation_id
 
         os.makedirs(output_dir, exist_ok=True)
         if not os.path.exists(self.file):
-            initial = {Keys.BUILDS: {}}
+            initial = {Keys.EVALUATIONS: {}}
             _save_yaml(initial, self.file)
 
     @property
@@ -391,7 +391,7 @@ def _set_key(self, dict, keys: List["str"], value):
 
             self._set_key(dict[keys[0]], keys[1:], value)
 
-    def save_stat(self, key: str, value):
+    def save_model_stat(self, key: str, value):
         """
         Save statistics to an yaml file in the build directory
         """
@@ -402,36 +402,25 @@ def save_stat(self, key: str, value):
 
         _save_yaml(stats_dict, self.file)
 
-    def add_sub_stat(self, parent_key: str, key: str, value):
-        """
-        Save nested statistics to an yaml file in the build directory
-
-        stats[parent_key][key] = value
-        """
-
-        stats_dict = self.stats
-
-        self._set_key(stats_dict, [parent_key, key], value)
-
-        _save_yaml(stats_dict, self.file)
-
-    def add_build_stat(self, key: str, value):
+    def save_model_eval_stat(self, key: str, value):
         stats_dict = self.stats
 
-        self._set_key(stats_dict, [Keys.BUILDS, self.stats_id, key], value)
+        self._set_key(stats_dict, [Keys.EVALUATIONS, self.evaluation_id, key], value)
 
         _save_yaml(stats_dict, self.file)
 
-    def add_build_sub_stat(self, parent_key: str, key: str, value):
+    def save_model_eval_sub_stat(self, parent_key: str, key: str, value):
         stats_dict = self.stats
 
-        self._set_key(stats_dict, [Keys.BUILDS, self.stats_id, parent_key, key], value)
+        self._set_key(
+            stats_dict, [Keys.EVALUATIONS, self.evaluation_id, parent_key, key], value
+        )
 
         _save_yaml(stats_dict, self.file)
 
     @property
-    def build_stats(self):
-        return self.stats[Keys.BUILDS][self.stats_id]
+    def evaluation_stats(self):
+        return self.stats[Keys.EVALUATIONS][self.evaluation_id]
 
 
 def print_cache_dir(_=None):
diff --git a/src/turnkeyml/model_api.py b/src/turnkeyml/model_api.py
index 6b1213b..d7cb155 100644
--- a/src/turnkeyml/model_api.py
+++ b/src/turnkeyml/model_api.py
@@ -21,7 +21,7 @@ def benchmark_model(
     inputs: Dict[str, Any],
     build_name: str,
     iterations: int = 100,
-    stats_id: str = "build",
+    evaluation_id: str = "build",
     cache_dir: str = filesystem.DEFAULT_CACHE_DIR,
     device: str = "x86",
     runtime: Optional[str] = None,
@@ -88,7 +88,7 @@ def benchmark_model(
             build_model(
                 model=model,
                 inputs=inputs,
-                stats_id=stats_id,
+                evaluation_id=evaluation_id,
                 build_name=build_name,
                 cache_dir=cache_dir,
                 rebuild=rebuild,
@@ -105,7 +105,7 @@ def benchmark_model(
                 rt_args_to_use = rt_args
 
             printing.log_info(f"Benchmarking on {device}...")
-            stats = filesystem.Stats(cache_dir, build_name, stats_id)
+            stats = filesystem.Stats(cache_dir, build_name, evaluation_id)
             model_handle = runtime_info["RuntimeClass"](
                 cache_dir=cache_dir,
                 build_name=build_name,
diff --git a/src/turnkeyml/run/tensorrt/runtime.py b/src/turnkeyml/run/tensorrt/runtime.py
index b6ef9a9..df1270c 100644
--- a/src/turnkeyml/run/tensorrt/runtime.py
+++ b/src/turnkeyml/run/tensorrt/runtime.py
@@ -13,6 +13,7 @@
     average_power_and_utilization,
 )
 
+
 def _get_nvidia_driver_version():
     try:
         output = subprocess.check_output(["nvidia-smi"], text=True)
@@ -23,10 +24,12 @@ def _get_nvidia_driver_version():
                 # Extract and return the driver version
                 return line.split(":")[1].strip().split()[0]
 
-    except Exception as e: # pylint: disable=broad-except
+    except Exception as e:  # pylint: disable=broad-except
         return str(e)
 
     return "Driver not found"
+
+
 class TensorRT(BaseRT):
     def __init__(
         self,
@@ -88,7 +91,7 @@ def _execute(
 
         # Add the GPU driver version to the stats file before execution
         gpu_driver_version = _get_nvidia_driver_version()
-        self.stats.add_build_stat("gpu_driver_version", gpu_driver_version)
+        self.stats.save_model_eval_stat("gpu_driver_version", gpu_driver_version)
         power_thread.start()
 
         run(
diff --git a/test/cli.py b/test/cli.py
index 3cf3835..e555bc7 100644
--- a/test/cli.py
+++ b/test/cli.py
@@ -149,7 +149,7 @@ def assert_success_of_builds(
                 stats = filesystem.Stats(
                     build_state.cache_dir,
                     build_state.config.build_name,
-                    build_state.stats_id,
+                    build_state.evaluation_id,
                 )
                 assert build_state.build_status == build.Status.SUCCESSFUL_BUILD
                 script_build_found = True
@@ -161,11 +161,11 @@ def assert_success_of_builds(
                     ), f"{build_state.info.__dict__[info_property[0]]} == {info_property[1]}"
 
                 if check_perf:
-                    assert stats.build_stats["mean_latency"] > 0
-                    assert stats.build_stats["throughput"] > 0
+                    assert stats.evaluation_stats["mean_latency"] > 0
+                    assert stats.evaluation_stats["throughput"] > 0
 
                 if check_iteration_count:
-                    iterations = stats.build_stats["iterations"]
+                    iterations = stats.evaluation_stats["iterations"]
                     assert iterations == check_iteration_count
 
                 if check_opset:
diff --git a/test/helpers/common.py b/test/helpers/common.py
index d5b402e..4a86782 100644
--- a/test/helpers/common.py
+++ b/test/helpers/common.py
@@ -1,4 +1,3 @@
-
 import os
 import shutil
 from typing import Dict
@@ -101,13 +100,13 @@ def forward(self, x):
 }
 
 
-def create_test_dir(key:str, test_scripts: Dict = None):
+def create_test_dir(key: str, test_scripts: Dict = None):
     # Define paths to be used
     base_dir = os.path.dirname(os.path.abspath(__file__))
     cache_dir = os.path.join(base_dir, "generated", f"{key}_cache_dir")
     corpus_dir = os.path.join(base_dir, "generated", f"test_corpus")
-    
-    # Delete folders if they exist and 
+
+    # Delete folders if they exist and
     if os.path.isdir(cache_dir):
         shutil.rmtree(cache_dir)
     if os.path.isdir(corpus_dir):
@@ -124,9 +123,11 @@ def create_test_dir(key:str, test_scripts: Dict = None):
 
     return cache_dir, corpus_dir
 
+
 def strip_dot_py(test_script_file: str) -> str:
     return test_script_file.split(".")[0]
 
+
 def get_stats_and_state(
     test_script: str,
     cache_dir: str,
@@ -141,8 +142,8 @@ def get_stats_and_state(
             stats = filesystem.Stats(
                 build_state.cache_dir,
                 build_state.config.build_name,
-                build_state.stats_id,
+                build_state.evaluation_id,
             )
-            return stats.build_stats, build_state
+            return stats.evaluation_stats, build_state
 
-    raise Exception(f"Stats not found for {test_script}")
\ No newline at end of file
+    raise Exception(f"Stats not found for {test_script}")