From 1099afefb6b50eba3bb3809e3688ff60b325124e Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 12 May 2021 22:00:16 +0100 Subject: [PATCH] [llvm] Add an IrSha1 observation space. This adds a new `IrSha1` observation space that is a 40-digit SHA1 checksum of the current module state. --- compiler_gym/envs/llvm/service/Benchmark.cc | 2 ++ compiler_gym/envs/llvm/service/Benchmark.h | 3 +++ compiler_gym/envs/llvm/service/LlvmSession.cc | 13 +++++++++++++ .../envs/llvm/service/ObservationSpaces.cc | 8 ++++++++ .../envs/llvm/service/ObservationSpaces.h | 2 ++ tests/llvm/observation_spaces_test.py | 19 +++++++++++++++++++ 6 files changed, 47 insertions(+) diff --git a/compiler_gym/envs/llvm/service/Benchmark.cc b/compiler_gym/envs/llvm/service/Benchmark.cc index 03cce266a..52d5de3a5 100644 --- a/compiler_gym/envs/llvm/service/Benchmark.cc +++ b/compiler_gym/envs/llvm/service/Benchmark.cc @@ -120,4 +120,6 @@ std::unique_ptr Benchmark::clone(const fs::path& workingDirectory) co return std::make_unique(name(), bitcode, workingDirectory, baselineCosts()); } +BenchmarkHash Benchmark::module_hash() const { return getModuleHash(*module_); } + } // namespace compiler_gym::llvm_service diff --git a/compiler_gym/envs/llvm/service/Benchmark.h b/compiler_gym/envs/llvm/service/Benchmark.h index 972fbcb6d..d9349ef47 100644 --- a/compiler_gym/envs/llvm/service/Benchmark.h +++ b/compiler_gym/envs/llvm/service/Benchmark.h @@ -43,6 +43,9 @@ class Benchmark { // Make a copy of the benchmark. std::unique_ptr clone(const boost::filesystem::path& workingDirectory) const; + // Compute and return a SHA1 hash of the module. + BenchmarkHash module_hash() const; + inline const std::string& name() const { return name_; } inline const size_t bitcodeSize() const { return bitcodeSize_; } diff --git a/compiler_gym/envs/llvm/service/LlvmSession.cc b/compiler_gym/envs/llvm/service/LlvmSession.cc index cd33d2ed2..21fb07a00 100644 --- a/compiler_gym/envs/llvm/service/LlvmSession.cc +++ b/compiler_gym/envs/llvm/service/LlvmSession.cc @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -270,6 +271,18 @@ Status LlvmSession::getObservation(LlvmObservationSpace space, Observation* repl reply->set_string_value(ir); break; } + case LlvmObservationSpace::IR_SHA1: { + std::stringstream ss; + const BenchmarkHash hash = benchmark().module_hash(); + // Hex encode, zero pad, and concatenate the unsigned integers that + // contain the hash. + for (uint32_t val : hash) { + ss << std::setfill('0') << std::setw(sizeof(BenchmarkHash::value_type) * 2) << std::hex + << val; + } + reply->set_string_value(ss.str()); + break; + } case LlvmObservationSpace::BITCODE_FILE: { // Generate an output path with 16 bits of randomness. const auto outpath = fs::unique_path(workingDirectory_ / "module-%%%%%%%%.bc"); diff --git a/compiler_gym/envs/llvm/service/ObservationSpaces.cc b/compiler_gym/envs/llvm/service/ObservationSpaces.cc index c2c5bd8c5..fd65d0a29 100644 --- a/compiler_gym/envs/llvm/service/ObservationSpaces.cc +++ b/compiler_gym/envs/llvm/service/ObservationSpaces.cc @@ -37,6 +37,14 @@ std::vector getLlvmObservationSpaceList() { space.set_platform_dependent(false); break; } + case LlvmObservationSpace::IR_SHA1: { + ScalarRange sha1Size; + space.mutable_string_size_range()->mutable_min()->set_value(40); + space.mutable_string_size_range()->mutable_max()->set_value(40); + space.set_deterministic(true); + space.set_platform_dependent(false); + break; + } case LlvmObservationSpace::BITCODE_FILE: { ScalarRange pathLength; space.mutable_string_size_range()->mutable_min()->set_value(0); diff --git a/compiler_gym/envs/llvm/service/ObservationSpaces.h b/compiler_gym/envs/llvm/service/ObservationSpaces.h index 23c75f0ce..358ebaa73 100644 --- a/compiler_gym/envs/llvm/service/ObservationSpaces.h +++ b/compiler_gym/envs/llvm/service/ObservationSpaces.h @@ -23,6 +23,8 @@ enum class LlvmObservationSpace { // The entire LLVM module as an IR string. This allows the user to do its own // feature extraction. IR, + // The 40-digit hex SHA1 checksum of the LLVM module. + IR_SHA1, // Write the bitcode to a file. Returns a string, which is the path of the // written file. BITCODE_FILE, diff --git a/tests/llvm/observation_spaces_test.py b/tests/llvm/observation_spaces_test.py index 9d76826e8..72dfdc46f 100644 --- a/tests/llvm/observation_spaces_test.py +++ b/tests/llvm/observation_spaces_test.py @@ -39,6 +39,7 @@ def test_observation_spaces(env: LlvmEnv): assert set(env.observation.spaces.keys()) == { "Ir", + "IrSha1", "BitcodeFile", "InstCount", "InstCountDict", @@ -79,6 +80,24 @@ def test_ir_observation_space(env: LlvmEnv): assert not space.platform_dependent +def test_ir_sha1_observation_space(env: LlvmEnv): + env.reset("cbench-v1/crc32") + key = "IrSha1" + space = env.observation.spaces[key] + assert isinstance(space.space, Sequence) + assert space.space.dtype == str + assert space.space.size_range == (40, 40) + + value: str = env.observation[key] + print(value) # For debugging in case of error. + assert isinstance(value, str) + assert len(value) == 40 + assert space.space.contains(value) + + assert space.deterministic + assert not space.platform_dependent + + def test_bitcode_observation_space(env: LlvmEnv): env.reset("cbench-v1/crc32") key = "BitcodeFile"