From b69edb155750b6543f1df1f064340985527b3c26 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Fri, 30 Jul 2021 09:47:46 +0000 Subject: [PATCH] Use 0 for size of infinite datasets. This changes the value of the `size` property for datasets from `float("inf")` to `0` if the size of the dataset is unknown or unbounded. This is because the size property is used to return `len()`, an the length cannot be a floating-point number.. Fixes #303. --- compiler_gym/bin/service.py | 4 ++-- compiler_gym/datasets/dataset.py | 24 ++++++++++--------- compiler_gym/envs/llvm/datasets/csmith.py | 4 ++-- .../envs/llvm/datasets/llvm_stress.py | 2 +- tests/llvm/datasets/csmith_test.py | 3 ++- tests/llvm/datasets/llvm_stress_test.py | 3 ++- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/compiler_gym/bin/service.py b/compiler_gym/bin/service.py index ec83b4866..c9c10ad41 100644 --- a/compiler_gym/bin/service.py +++ b/compiler_gym/bin/service.py @@ -75,7 +75,6 @@ import sys from typing import Iterable -import humanize from absl import app, flags from compiler_gym.datasets import Dataset @@ -132,7 +131,8 @@ def summarize_datasets(datasets: Iterable[Dataset]) -> str: [ ( n, - humanize.intcomma(f) if f >= 0 else "∞", + # A size of zero means infinite. + f"{f:,d}" if f > 0 else "∞", l, v, ) diff --git a/compiler_gym/datasets/dataset.py b/compiler_gym/datasets/dataset.py index 9b110bc56..3ee7bb71e 100644 --- a/compiler_gym/datasets/dataset.py +++ b/compiler_gym/datasets/dataset.py @@ -233,21 +233,19 @@ def site_data_size_in_bytes(self) -> int: ) return total_size - # We use Union[int, float] to represent the size because infinite size is - # represented by math.inf, which is a float. For all other sizes this should - # be an int. @property - def size(self) -> Union[int, float]: - """The number of benchmarks in the dataset. If the number of benchmarks - is unbounded, for example because the dataset represents a program - generator that can produce an infinite number of programs, the value is - :code:`math.inf`. + def size(self) -> int: + """The number of benchmarks in the dataset. + + If the number of benchmarks is unknown or unbounded, for example because + the dataset represents a program generator that can produce an infinite + number of programs, the value is 0. - :type: Union[int, float] + :type: int """ return 0 - def __len__(self) -> Union[int, float]: + def __len__(self) -> int: """The number of benchmarks in the dataset. This is the same as :meth:`Dataset.size @@ -256,7 +254,11 @@ def __len__(self) -> Union[int, float]: >>> len(dataset) == dataset.size True - :return: An integer, or :code:`math.float`. + If the number of benchmarks is unknown or unbounded, for example because + the dataset represents a program generator that can produce an infinite + number of programs, the value is 0. + + :return: An integer. """ return self.size diff --git a/compiler_gym/envs/llvm/datasets/csmith.py b/compiler_gym/envs/llvm/datasets/csmith.py index 1204439a4..97f241854 100644 --- a/compiler_gym/envs/llvm/datasets/csmith.py +++ b/compiler_gym/envs/llvm/datasets/csmith.py @@ -230,10 +230,10 @@ def _build_csmith(install_root: Path, logger: logging.Logger): raise CsmithBuildError("make install", stdout, stderr) @property - def size(self) -> float: + def size(self) -> int: # Actually 2^32 - 1, but practically infinite for all intents and # purposes. - return float("inf") + return 0 def benchmark_uris(self) -> Iterable[str]: return (f"{self.name}/{i}" for i in range(UINT_MAX)) diff --git a/compiler_gym/envs/llvm/datasets/llvm_stress.py b/compiler_gym/envs/llvm/datasets/llvm_stress.py index 6c9aeca8f..130add7a7 100644 --- a/compiler_gym/envs/llvm/datasets/llvm_stress.py +++ b/compiler_gym/envs/llvm/datasets/llvm_stress.py @@ -50,7 +50,7 @@ def __init__(self, site_data_base: Path, sort_order: int = 0): def size(self) -> int: # Actually 2^32 - 1, but practically infinite for all intents and # purposes. - return float("inf") + return 0 def benchmark_uris(self) -> Iterable[str]: return (f"{self.name}/{i}" for i in range(UINT_MAX)) diff --git a/tests/llvm/datasets/csmith_test.py b/tests/llvm/datasets/csmith_test.py index 8d07b3a01..7625cf614 100644 --- a/tests/llvm/datasets/csmith_test.py +++ b/tests/llvm/datasets/csmith_test.py @@ -30,7 +30,8 @@ def csmith_dataset() -> CsmithDataset: def test_csmith_size(csmith_dataset: CsmithDataset): - assert csmith_dataset.size == float("inf") + assert csmith_dataset.size == 0 + assert len(csmith_dataset) == 0 @pytest.mark.parametrize("index", range(3) if is_ci() else range(250)) diff --git a/tests/llvm/datasets/llvm_stress_test.py b/tests/llvm/datasets/llvm_stress_test.py index cc5e65117..303616be4 100644 --- a/tests/llvm/datasets/llvm_stress_test.py +++ b/tests/llvm/datasets/llvm_stress_test.py @@ -31,7 +31,8 @@ def llvm_stress_dataset() -> LlvmStressDataset: def test_llvm_stress_size(llvm_stress_dataset: LlvmStressDataset): - assert llvm_stress_dataset.size == float("inf") + assert llvm_stress_dataset.size == 0 + assert len(llvm_stress_dataset) == 0 @pytest.mark.parametrize("index", range(3) if is_ci() else range(250))