Skip to content

Commit

Permalink
Merge pull request #348 from ChrisCummins/csmith-bin
Browse files Browse the repository at this point in the history
Compile and ship Csmith as part of python package.
  • Loading branch information
ChrisCummins authored Aug 3, 2021
2 parents e94bdb4 + 63acf99 commit 949b6c8
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 116 deletions.
11 changes: 11 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,17 @@ http_archive(
urls = ["https://github.com/pytorch/cpuinfo/archive/63b254577ed77a8004a9be6ac707f3dccc4e1fd9.tar.gz"],
)

# === Csmith ===
# https://embed.cs.utah.edu/csmith/

http_archive(
name = "csmith",
build_file_content = all_content,
sha256 = "ba871c1e5a05a71ecd1af514fedba30561b16ee80b8dd5ba8f884eaded47009f",
strip_prefix = "csmith-csmith-2.3.0",
urls = ["https://github.com/csmith-project/csmith/archive/refs/tags/csmith-2.3.0.tar.gz"],
)

# === DeepDataFlow ===
# https://zenodo.org/record/4122437

Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/llvm/datasets/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ py_library(
"llvm_stress.py",
"poj104.py",
],
data = ["//compiler_gym/third_party/csmith:all"],
visibility = ["//visibility:public"],
deps = [
"//compiler_gym/datasets",
Expand Down
150 changes: 34 additions & 116 deletions compiler_gym/envs/llvm/datasets/csmith.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,29 @@
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import io
import logging
import subprocess
import sys
import tarfile
import tempfile
from pathlib import Path
from threading import Lock
from typing import Iterable, List
from typing import Iterable, List, Optional

import numpy as np
from fasteners import InterProcessLock

from compiler_gym.datasets import Benchmark, BenchmarkSource, Dataset
from compiler_gym.datasets.benchmark import BenchmarkInitError, BenchmarkWithSource
from compiler_gym.datasets.dataset import DatasetInitError
from compiler_gym.envs.llvm.llvm_benchmark import ClangInvocation
from compiler_gym.service.proto import BenchmarkDynamicConfig
from compiler_gym.util.decorators import memoized_property
from compiler_gym.util.download import download
from compiler_gym.util.runfiles_path import transient_cache_path
from compiler_gym.util.runfiles_path import runfiles_path
from compiler_gym.util.shell_format import plural
from compiler_gym.util.truncate import truncate

# The maximum value for the --seed argument to csmith.
UINT_MAX = (2 ** 32) - 1

# A lock for exclusive access to the Csmith build logic.
_CSMITH_BUILD_LOCK = Lock()
_CSMITH_BIN = runfiles_path("compiler_gym/third_party/csmith/csmith/bin/csmith")
_CSMITH_INCLUDES = runfiles_path(
"compiler_gym/third_party/csmith/csmith/include/csmith-2.3.0"
)


class CsmithBenchmark(BenchmarkWithSource):
Expand Down Expand Up @@ -68,31 +62,6 @@ def source(self) -> str:
return self._src.decode("utf-8")


class CsmithBuildError(DatasetInitError):
"""Error raised if :meth:`CsmithDataset.install()
<compiler_gym.datasets.CsmithDataset.install>` fails."""

def __init__(self, failing_stage: str, stdout: str, stderr: str):
install_instructions = {
"linux": "sudo apt install g++ m4",
"darwin": "brew install m4",
}[sys.platform]

super().__init__(
"\n".join(
[
f"Failed to build Csmith from source, `{failing_stage}` failed.",
"You may be missing installation dependencies. Install them using:",
f" {install_instructions}",
"See https://github.com/csmith-project/csmith#install-csmith for more details",
f"--- Start `{failing_stage}` logs: ---\n",
stdout,
stderr,
]
)
)


class CsmithDataset(Dataset):
"""A dataset which uses Csmith to generate programs.
Expand Down Expand Up @@ -128,7 +97,28 @@ class CsmithDataset(Dataset):
details.
"""

def __init__(self, site_data_base: Path, sort_order: int = 0):
def __init__(
self,
site_data_base: Path,
sort_order: int = 0,
csmith_bin: Optional[Path] = None,
csmith_includes: Optional[Path] = None,
):
"""Constructor.
:param site_data_base: The base path of a directory that will be used to
store installed files.
:param sort_order: An optional numeric value that should be used to
order this dataset relative to others. Lowest value sorts first.
:param csmith_bin: The path of the Csmith binary to use. If not
provided, the version of Csmith shipped with CompilerGym is used.
:param csmith_includes: The path of the Csmith includes directory. If
not provided, the includes of the Csmith shipped with CompilerGym is
used.
"""
super().__init__(
name="generator://csmith-v0",
description="Random conformant C99 programs",
Expand All @@ -141,93 +131,21 @@ def __init__(self, site_data_base: Path, sort_order: int = 0):
sort_order=sort_order,
benchmark_class=CsmithBenchmark,
)
self.csmith_path = self.site_data_path / "bin" / "csmith"
csmith_include_dir = self.site_data_path / "include" / "csmith-2.3.0"

self._installed = False
self._build_lockfile = self.site_data_path / ".build.LOCK"
self._build_markerfile = self.site_data_path / ".built"

self.csmith_bin_path = csmith_bin or _CSMITH_BIN
self.csmith_includes_path = csmith_includes or _CSMITH_INCLUDES
# The command that is used to compile an LLVM-IR bitcode file from a
# Csmith input. Reads from stdin, writes to stdout.
self.clang_compile_command: List[str] = ClangInvocation.from_c_file(
"-", # Read from stdin.
copt=[
"-xc",
"-xc", # The C programming language.
"-ferror-limit=1", # Stop on first error.
"-w", # No warnings.
f"-I{csmith_include_dir}", # Include the Csmith headers.
f"-I{self.csmith_includes_path}", # Include the Csmith headers.
],
).command(
outpath="-"
) # Write to stdout.

@property
def installed(self) -> bool:
# Fast path for repeated checks to 'installed' without a disk op.
if not self._installed:
self._installed = self._build_markerfile.is_file()
return self._installed

def install(self) -> None:
"""Download and build the Csmith binary."""
super().install()

if self.installed:
return

with _CSMITH_BUILD_LOCK, InterProcessLock(self._build_lockfile):
# Repeat the check to see if we have already installed the dataset
# now that we have acquired the lock.
if not self.installed:
self.logger.info("Downloading and building Csmith")
self._build_csmith(self.site_data_path, self.logger)
self._build_markerfile.touch()

@staticmethod
def _build_csmith(install_root: Path, logger: logging.Logger):
"""Download, build, and install Csmith to the given directory."""
tar_data = io.BytesIO(
download(
urls=[
"https://github.com/csmith-project/csmith/archive/refs/tags/csmith-2.3.0.tar.gz",
],
sha256="ba871c1e5a05a71ecd1af514fedba30561b16ee80b8dd5ba8f884eaded47009f",
)
outpath="-" # Write to stdout.
)
# Csmith uses a standard `configure` + `make install` build process.
with tempfile.TemporaryDirectory(
dir=transient_cache_path("."), prefix="csmith-"
) as d:
with tarfile.open(fileobj=tar_data, mode="r:gz") as arc:
arc.extractall(d)

# The path of the extracted sources.
src_dir = Path(d) / "csmith-csmith-2.3.0"

logger.debug("Configuring Csmith at %s", d)
configure = subprocess.Popen(
["./configure", f"--prefix={install_root}"],
cwd=src_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
stdout, stderr = configure.communicate(timeout=600)
if configure.returncode:
raise CsmithBuildError("./configure", stdout, stderr)

logger.debug("Installing Csmith to %s", install_root)
make = subprocess.Popen(
["make", "-j", "install"],
cwd=src_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
stdout, stderr = make.communicate(timeout=600)
if make.returncode:
raise CsmithBuildError("make install", stdout, stderr)

@property
def size(self) -> int:
Expand Down Expand Up @@ -271,7 +189,7 @@ def benchmark_from_seed(
# assemble a bitcode.
self.logger.debug("Exec csmith --seed %d", seed)
csmith = subprocess.Popen(
[str(self.csmith_path), "--seed", str(seed)],
[str(self.csmith_bin_path), "--seed", str(seed)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
Expand Down
30 changes: 30 additions & 0 deletions compiler_gym/third_party/csmith/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
load("@rules_foreign_cc//tools/build_defs:configure.bzl", "configure_make")

# Funnel the Csmith sources through a no-op filegroup to fix an issue in
# collecting the generated files for use in `data` attributes of some targets.
# See: https://github.com/bazelbuild/rules_foreign_cc/issues/619
filegroup(
name = "all",
srcs = [":csmith"],
visibility = ["//visibility:public"],
)

configure_make(
name = "csmith",
binaries = ["csmith"],
configure_env_vars = {
# Workaround error with libtool usage on macOS. See:
# https://github.com/bazelbuild/rules_foreign_cc/issues/185
"AR": "/usr/bin/ar",
# Csmith uses decreated stdlib functions like std::bind2nd().
"CXXFLAGS": "-D_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES",
},
# Workaround a strange bug where the srand48_deterministic test returns
# true on macOS, although this only available and needed for OpenBSD.
configure_options = ["ac_cv_func_srand48_deterministic=no"],
lib_source = "@csmith//:all",
)
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def get_tag(self):
"envs/llvm/service/passes/*.txt",
"third_party/cbench/benchmarks.txt",
"third_party/cbench/cbench-v*/*",
"third_party/csmith/csmith/bin/csmith",
"third_party/csmith/csmith/include/csmith-2.3.0/*.h",
"third_party/inst2vec/*.pickle",
]
},
Expand Down

0 comments on commit 949b6c8

Please sign in to comment.