Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use 0 for size of infinite datasets. #347

Merged
merged 1 commit into from
Jul 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions compiler_gym/bin/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@
import sys
from typing import Iterable

import humanize
from absl import app, flags

from compiler_gym.datasets import Dataset
Expand Down Expand Up @@ -132,7 +131,8 @@ def summarize_datasets(datasets: Iterable[Dataset]) -> str:
[
(
n,
humanize.intcomma(f) if f >= 0 else "∞",
# A size of zero means infinite.
f"{f:,d}" if f > 0 else "∞",
l,
v,
)
Expand Down
24 changes: 13 additions & 11 deletions compiler_gym/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,21 +233,19 @@ def site_data_size_in_bytes(self) -> int:
)
return total_size

# We use Union[int, float] to represent the size because infinite size is
# represented by math.inf, which is a float. For all other sizes this should
# be an int.
@property
def size(self) -> Union[int, float]:
"""The number of benchmarks in the dataset. If the number of benchmarks
is unbounded, for example because the dataset represents a program
generator that can produce an infinite number of programs, the value is
:code:`math.inf`.
def size(self) -> int:
"""The number of benchmarks in the dataset.

If the number of benchmarks is unknown or unbounded, for example because
the dataset represents a program generator that can produce an infinite
number of programs, the value is 0.

:type: Union[int, float]
:type: int
"""
return 0

def __len__(self) -> Union[int, float]:
def __len__(self) -> int:
"""The number of benchmarks in the dataset.

This is the same as :meth:`Dataset.size
Expand All @@ -256,7 +254,11 @@ def __len__(self) -> Union[int, float]:
>>> len(dataset) == dataset.size
True

:return: An integer, or :code:`math.float`.
If the number of benchmarks is unknown or unbounded, for example because
the dataset represents a program generator that can produce an infinite
number of programs, the value is 0.

:return: An integer.
"""
return self.size

Expand Down
4 changes: 2 additions & 2 deletions compiler_gym/envs/llvm/datasets/csmith.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,10 @@ def _build_csmith(install_root: Path, logger: logging.Logger):
raise CsmithBuildError("make install", stdout, stderr)

@property
def size(self) -> float:
def size(self) -> int:
# Actually 2^32 - 1, but practically infinite for all intents and
# purposes.
return float("inf")
return 0

def benchmark_uris(self) -> Iterable[str]:
return (f"{self.name}/{i}" for i in range(UINT_MAX))
Expand Down
2 changes: 1 addition & 1 deletion compiler_gym/envs/llvm/datasets/llvm_stress.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, site_data_base: Path, sort_order: int = 0):
def size(self) -> int:
# Actually 2^32 - 1, but practically infinite for all intents and
# purposes.
return float("inf")
return 0

def benchmark_uris(self) -> Iterable[str]:
return (f"{self.name}/{i}" for i in range(UINT_MAX))
Expand Down
3 changes: 2 additions & 1 deletion tests/llvm/datasets/csmith_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def csmith_dataset() -> CsmithDataset:


def test_csmith_size(csmith_dataset: CsmithDataset):
assert csmith_dataset.size == float("inf")
assert csmith_dataset.size == 0
assert len(csmith_dataset) == 0


@pytest.mark.parametrize("index", range(3) if is_ci() else range(250))
Expand Down
3 changes: 2 additions & 1 deletion tests/llvm/datasets/llvm_stress_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def llvm_stress_dataset() -> LlvmStressDataset:


def test_llvm_stress_size(llvm_stress_dataset: LlvmStressDataset):
assert llvm_stress_dataset.size == float("inf")
assert llvm_stress_dataset.size == 0
assert len(llvm_stress_dataset) == 0


@pytest.mark.parametrize("index", range(3) if is_ci() else range(250))
Expand Down