From ea37391f2ee33aae70d15ce82d3e7db770128d8e Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 12 May 2021 21:20:40 +0100 Subject: [PATCH 1/2] [llvm] Remove an unused Benchmark.hash() member. This value is not used so remove it. --- compiler_gym/envs/llvm/service/Benchmark.cc | 2 -- compiler_gym/envs/llvm/service/Benchmark.h | 9 +-------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/compiler_gym/envs/llvm/service/Benchmark.cc b/compiler_gym/envs/llvm/service/Benchmark.cc index 9f136dffc..03cce266a 100644 --- a/compiler_gym/envs/llvm/service/Benchmark.cc +++ b/compiler_gym/envs/llvm/service/Benchmark.cc @@ -100,7 +100,6 @@ Benchmark::Benchmark(const std::string& name, const Bitcode& bitcode, : context_(std::make_unique()), module_(makeModuleOrDie(*context_, bitcode, name)), baselineCosts_(baselineCosts), - hash_(getModuleHash(*module_)), name_(name), bitcodeSize_(bitcode.size()) {} @@ -110,7 +109,6 @@ Benchmark::Benchmark(const std::string& name, std::unique_ptr : context_(std::move(context)), module_(std::move(module)), baselineCosts_(baselineCosts), - hash_(getModuleHash(*module_)), name_(name), bitcodeSize_(bitcodeSize) {} diff --git a/compiler_gym/envs/llvm/service/Benchmark.h b/compiler_gym/envs/llvm/service/Benchmark.h index 713c65b99..972fbcb6d 100644 --- a/compiler_gym/envs/llvm/service/Benchmark.h +++ b/compiler_gym/envs/llvm/service/Benchmark.h @@ -17,11 +17,7 @@ namespace compiler_gym::llvm_service { -// We identify benchmarks using a hash of the LLVM module, which is a -// 160 bits SHA1. -// -// NOTE(cummins): In the future when we extend this to support optimizing for -// performance, we would need this +// A 160 bits SHA1 that identifies an LLVM module. using BenchmarkHash = llvm::ModuleHash; using Bitcode = llvm::SmallString<0>; @@ -66,8 +62,6 @@ class Benchmark { inline const llvm::Module* module_ptr() const { return module_.get(); } - inline const BenchmarkHash hash() const { return hash_; } - // Replace the benchmark module with a new one. This is to enable // out-of-process modification of the IR by serializing the benchmark to a // file, modifying the file, then loading the modified file and updating the @@ -81,7 +75,6 @@ class Benchmark { std::unique_ptr context_; std::unique_ptr module_; const BaselineCosts baselineCosts_; - const BenchmarkHash hash_; const std::string name_; // The length of the bitcode string for this benchmark. const size_t bitcodeSize_; From 1099afefb6b50eba3bb3809e3688ff60b325124e Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 12 May 2021 22:00:16 +0100 Subject: [PATCH 2/2] [llvm] Add an IrSha1 observation space. This adds a new `IrSha1` observation space that is a 40-digit SHA1 checksum of the current module state. --- compiler_gym/envs/llvm/service/Benchmark.cc | 2 ++ compiler_gym/envs/llvm/service/Benchmark.h | 3 +++ compiler_gym/envs/llvm/service/LlvmSession.cc | 13 +++++++++++++ .../envs/llvm/service/ObservationSpaces.cc | 8 ++++++++ .../envs/llvm/service/ObservationSpaces.h | 2 ++ tests/llvm/observation_spaces_test.py | 19 +++++++++++++++++++ 6 files changed, 47 insertions(+) diff --git a/compiler_gym/envs/llvm/service/Benchmark.cc b/compiler_gym/envs/llvm/service/Benchmark.cc index 03cce266a..52d5de3a5 100644 --- a/compiler_gym/envs/llvm/service/Benchmark.cc +++ b/compiler_gym/envs/llvm/service/Benchmark.cc @@ -120,4 +120,6 @@ std::unique_ptr Benchmark::clone(const fs::path& workingDirectory) co return std::make_unique(name(), bitcode, workingDirectory, baselineCosts()); } +BenchmarkHash Benchmark::module_hash() const { return getModuleHash(*module_); } + } // namespace compiler_gym::llvm_service diff --git a/compiler_gym/envs/llvm/service/Benchmark.h b/compiler_gym/envs/llvm/service/Benchmark.h index 972fbcb6d..d9349ef47 100644 --- a/compiler_gym/envs/llvm/service/Benchmark.h +++ b/compiler_gym/envs/llvm/service/Benchmark.h @@ -43,6 +43,9 @@ class Benchmark { // Make a copy of the benchmark. std::unique_ptr clone(const boost::filesystem::path& workingDirectory) const; + // Compute and return a SHA1 hash of the module. + BenchmarkHash module_hash() const; + inline const std::string& name() const { return name_; } inline const size_t bitcodeSize() const { return bitcodeSize_; } diff --git a/compiler_gym/envs/llvm/service/LlvmSession.cc b/compiler_gym/envs/llvm/service/LlvmSession.cc index cd33d2ed2..21fb07a00 100644 --- a/compiler_gym/envs/llvm/service/LlvmSession.cc +++ b/compiler_gym/envs/llvm/service/LlvmSession.cc @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -270,6 +271,18 @@ Status LlvmSession::getObservation(LlvmObservationSpace space, Observation* repl reply->set_string_value(ir); break; } + case LlvmObservationSpace::IR_SHA1: { + std::stringstream ss; + const BenchmarkHash hash = benchmark().module_hash(); + // Hex encode, zero pad, and concatenate the unsigned integers that + // contain the hash. + for (uint32_t val : hash) { + ss << std::setfill('0') << std::setw(sizeof(BenchmarkHash::value_type) * 2) << std::hex + << val; + } + reply->set_string_value(ss.str()); + break; + } case LlvmObservationSpace::BITCODE_FILE: { // Generate an output path with 16 bits of randomness. const auto outpath = fs::unique_path(workingDirectory_ / "module-%%%%%%%%.bc"); diff --git a/compiler_gym/envs/llvm/service/ObservationSpaces.cc b/compiler_gym/envs/llvm/service/ObservationSpaces.cc index c2c5bd8c5..fd65d0a29 100644 --- a/compiler_gym/envs/llvm/service/ObservationSpaces.cc +++ b/compiler_gym/envs/llvm/service/ObservationSpaces.cc @@ -37,6 +37,14 @@ std::vector getLlvmObservationSpaceList() { space.set_platform_dependent(false); break; } + case LlvmObservationSpace::IR_SHA1: { + ScalarRange sha1Size; + space.mutable_string_size_range()->mutable_min()->set_value(40); + space.mutable_string_size_range()->mutable_max()->set_value(40); + space.set_deterministic(true); + space.set_platform_dependent(false); + break; + } case LlvmObservationSpace::BITCODE_FILE: { ScalarRange pathLength; space.mutable_string_size_range()->mutable_min()->set_value(0); diff --git a/compiler_gym/envs/llvm/service/ObservationSpaces.h b/compiler_gym/envs/llvm/service/ObservationSpaces.h index 23c75f0ce..358ebaa73 100644 --- a/compiler_gym/envs/llvm/service/ObservationSpaces.h +++ b/compiler_gym/envs/llvm/service/ObservationSpaces.h @@ -23,6 +23,8 @@ enum class LlvmObservationSpace { // The entire LLVM module as an IR string. This allows the user to do its own // feature extraction. IR, + // The 40-digit hex SHA1 checksum of the LLVM module. + IR_SHA1, // Write the bitcode to a file. Returns a string, which is the path of the // written file. BITCODE_FILE, diff --git a/tests/llvm/observation_spaces_test.py b/tests/llvm/observation_spaces_test.py index 9d76826e8..72dfdc46f 100644 --- a/tests/llvm/observation_spaces_test.py +++ b/tests/llvm/observation_spaces_test.py @@ -39,6 +39,7 @@ def test_observation_spaces(env: LlvmEnv): assert set(env.observation.spaces.keys()) == { "Ir", + "IrSha1", "BitcodeFile", "InstCount", "InstCountDict", @@ -79,6 +80,24 @@ def test_ir_observation_space(env: LlvmEnv): assert not space.platform_dependent +def test_ir_sha1_observation_space(env: LlvmEnv): + env.reset("cbench-v1/crc32") + key = "IrSha1" + space = env.observation.spaces[key] + assert isinstance(space.space, Sequence) + assert space.space.dtype == str + assert space.space.size_range == (40, 40) + + value: str = env.observation[key] + print(value) # For debugging in case of error. + assert isinstance(value, str) + assert len(value) == 40 + assert space.space.contains(value) + + assert space.deterministic + assert not space.platform_dependent + + def test_bitcode_observation_space(env: LlvmEnv): env.reset("cbench-v1/crc32") key = "BitcodeFile"