Skip to content

Commit

Permalink
Merge pull request #278 from ethereum/synth_benchmarks
Browse files Browse the repository at this point in the history
EVM instructions synthetic benchmarks
  • Loading branch information
chfast authored Mar 9, 2021
2 parents 1151744 + 0b3c2c4 commit b9facfa
Show file tree
Hide file tree
Showing 4 changed files with 303 additions and 11 deletions.
14 changes: 8 additions & 6 deletions test/bench/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ target_sources(
evmone-bench PRIVATE
bench.cpp
helpers.hpp
synthetic_benchmarks.cpp synthetic_benchmarks.hpp
)

set(HAVE_STD_FILESYSTEM 0)
Expand Down Expand Up @@ -46,11 +47,12 @@ set(PREFIX evmone/bench)

# Check if DIR argument works.
add_test(NAME ${PREFIX}/dir COMMAND evmone-bench ${CMAKE_CURRENT_SOURCE_DIR}/../benchmarks --benchmark_list_tests)
set_tests_properties(${PREFIX}/dir PROPERTIES PASS_REGULAR_EXPRESSION "execute/synth")

# Empty DIR name should run no benchmarks.
add_test(NAME ${PREFIX}/dirname_empty COMMAND evmone-bench "" --benchmark_list_tests)
set_tests_properties(${PREFIX}/dirname_empty PROPERTIES PASS_REGULAR_EXPRESSION "Failed to match any benchmarks")
# Omitting DIR is fine.
add_test(NAME ${PREFIX}/no_dir COMMAND evmone-bench --benchmark_list_tests)
set_tests_properties(${PREFIX}/no_dir PROPERTIES PASS_REGULAR_EXPRESSION "execute/synth")

# Missing DIR argument is an error.
add_test(NAME ${PREFIX}/no_dir COMMAND evmone-bench)
set_tests_properties(${PREFIX}/no_dir PROPERTIES PASS_REGULAR_EXPRESSION "DIR argument .* missing")
# Empty DIR name should list only built-in benchmarks
add_test(NAME ${PREFIX}/dirname_empty COMMAND evmone-bench "" --benchmark_list_tests)
set_tests_properties(${PREFIX}/dirname_empty PROPERTIES PASS_REGULAR_EXPRESSION "execute/synth")
14 changes: 9 additions & 5 deletions test/bench/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// SPDX-License-Identifier: Apache-2.0

#include "helpers.hpp"
#include "synthetic_benchmarks.hpp"
#include <benchmark/benchmark.h>
#include <evmc/evmc.hpp>
#include <evmc/loader.h>
Expand Down Expand Up @@ -190,12 +191,14 @@ constexpr auto cli_parsing_error = -3;
///
/// The following variants of number arguments are supported (including argv[0]):
///
/// 1: evmone-bench
/// Uses evmone VMs, only synthetic benchmarks are available.
/// 2: evmone-bench benchmarks_dir
/// Uses evmone VM, loads all benchmarks from benchmarks_dir.
/// Uses evmone VMs, loads all benchmarks from benchmarks_dir.
/// 3: evmone-bench evmc_config benchmarks_dir
/// The same as (2) but loads custom EVMC VM.
/// The same as (2) but loads additional custom EVMC VM.
/// 4: evmone-bench code_hex_file input_hex expected_output_hex.
/// Uses evmone VM, registers custom benchmark with the code from the given file,
/// Uses evmone VMs, registers custom benchmark with the code from the given file,
/// and the given input. The benchmark will compare the output with the provided
/// expected one.
std::tuple<int, std::vector<BenchmarkCase>> parseargs(int argc, char** argv)
Expand All @@ -210,8 +213,8 @@ std::tuple<int, std::vector<BenchmarkCase>> parseargs(int argc, char** argv)
switch (argc)
{
case 1:
std::cerr << "DIR argument (path to a directory with benchmarks) missing\n";
return {cli_parsing_error, {}};
// Run with built-in synthetic benchmarks only.
break;
case 2:
benchmarks_dir = argv[1];
break;
Expand Down Expand Up @@ -287,6 +290,7 @@ int main(int argc, char** argv)
registered_vms["advanced"] = evmc::VM{evmc_create_evmone(), {{"O", "2"}}};
registered_vms["baseline"] = evmc::VM{evmc_create_evmone(), {{"O", "0"}}};
register_benchmarks(benchmark_cases);
register_synthetic_benchmarks();
RunSpecifiedBenchmarks();
return 0;
}
Expand Down
277 changes: 277 additions & 0 deletions test/bench/synthetic_benchmarks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
// evmone: Fast Ethereum Virtual Machine implementation
// Copyright 2020 The evmone Authors.
// SPDX-License-Identifier: Apache-2.0

#include "synthetic_benchmarks.hpp"
#include "helpers.hpp"
#include "test/utils/bytecode.hpp"
#include <evmc/instructions.h>
#include <evmone/instruction_traits.hpp>

using namespace benchmark;

namespace evmone::test
{
namespace
{
/// Stack limit inside the EVM benchmark loop (one stack item is used for the loop counter).
constexpr auto stack_limit = 1023;

enum class Mode
{
min_stack = 0, ///< The code uses as minimal stack as possible.
full_stack = 1, ///< The code fills the stack up to its limit.
};

/// The instruction grouping by EVM stack requirements.
enum class InstructionCategory : char
{
nop = 'n', ///< No-op instruction.
nullop = 'a', ///< Nullary operator - produces a result without any stack input.
unop = 'u', ///< Unary operator.
binop = 'b', ///< Binary operator.
push = 'p', ///< PUSH instruction.
dup = 'd', ///< DUP instruction.
swap = 's', ///< SWAP instruction.
other = 'X', ///< Not any of the categories above.
};

constexpr InstructionCategory get_instruction_category(evmc_opcode opcode) noexcept
{
const auto trait = instr::traits[opcode];
if (opcode >= OP_PUSH1 && opcode <= OP_PUSH32)
return InstructionCategory::push;
else if (opcode >= OP_SWAP1 && opcode <= OP_SWAP16)
return InstructionCategory::swap;
else if (opcode >= OP_DUP1 && opcode <= OP_DUP16)
return InstructionCategory::dup;
else if (trait.stack_height_required == 0 && trait.stack_height_change == 0)
return InstructionCategory::nop;
else if (trait.stack_height_required == 0 && trait.stack_height_change == 1)
return InstructionCategory::nullop;
else if (trait.stack_height_required == 1 && trait.stack_height_change == 0)
return InstructionCategory::unop;
else if (trait.stack_height_required == 2 && trait.stack_height_change == -1)
return InstructionCategory::binop;
else
return InstructionCategory::other;
}

struct CodeParams
{
evmc_opcode opcode;
Mode mode;
};

/// The less-than comparison operator. Needed for std::map.
[[maybe_unused]] inline constexpr bool operator<(const CodeParams& a, const CodeParams& b) noexcept
{
return std::tuple(a.opcode, a.mode) < std::tuple(b.opcode, b.mode);
}

std::string to_string(const CodeParams& params)
{
return std::string{instr::traits[params.opcode].name} + '/' +
static_cast<char>(get_instruction_category(params.opcode)) +
std::to_string(static_cast<int>(params.mode));
}

/// Generates the EVM benchmark loop inner code for the given opcode and "mode".
bytecode generate_loop_inner_code(CodeParams params)
{
const auto [opcode, mode] = params;
const auto category = get_instruction_category(opcode);
switch (mode)
{
case Mode::min_stack:
switch (category)
{
case InstructionCategory::nop:
// JUMPDEST JUMPDEST ...
return stack_limit * 2 * bytecode{opcode};

case InstructionCategory::nullop:
// CALLER POP CALLER POP ...
return stack_limit * (bytecode{opcode} + OP_POP);

case InstructionCategory::unop:
// DUP1 NOT NOT ... POP
return OP_DUP1 + stack_limit * 2 * bytecode{opcode} + OP_POP;

case InstructionCategory::binop:
// DUP1 DUP1 ADD DUP1 ADD DUP1 ADD ... POP
return OP_DUP1 + (stack_limit - 1) * (OP_DUP1 + bytecode{opcode}) + OP_POP;

case InstructionCategory::push:
// PUSH1 POP PUSH1 POP ...
return stack_limit * (push(opcode, {}) + OP_POP);

case InstructionCategory::dup:
{
// The required n stack height for DUPn is provided by
// duplicating the loop counter n-1 times with DUP1.
const auto n = opcode - OP_DUP1 + 1;
// DUP1 ... DUPn POP DUPn POP ... POP ...
// \ n-1 / \ n-1 /
return (n - 1) * OP_DUP1 + // Required n stack height.
(stack_limit - (n - 1)) * //
(bytecode{opcode} + OP_POP) + // Multiple DUPn POP pairs.
(n - 1) * OP_POP; // Pop initially duplicated values.
}

case InstructionCategory::swap:
{
// The required n+1 stack height for SWAPn is provided by duplicating the loop counter
// n times with DUP1. This also guarantees the loop counter remains unchanged because
// it is always going to be swapped to the same value.
const auto n = opcode - OP_SWAP1 + 1;
// DUP1 ... SWAPn SWAPn ... POP ...
// \ n / \ n /
return n * OP_DUP1 + // Required n+1 stack height.
stack_limit * 2 * bytecode{opcode} + // Multiple SWAPns.
n * OP_POP; // Pop initially duplicated values.
}

default:
break;
}
break;

case Mode::full_stack:
switch (category)
{
case InstructionCategory::nullop:
// CALLER CALLER ... POP POP ...
return stack_limit * opcode + stack_limit * OP_POP;

case InstructionCategory::binop:
// DUP1 DUP1 DUP1 ... ADD ADD ADD ... POP
return stack_limit * OP_DUP1 + (stack_limit - 1) * opcode + OP_POP;

case InstructionCategory::push:
// PUSH1 PUSH1 PUSH1 ... POP POP POP ...
return stack_limit * push(opcode, {}) + stack_limit * OP_POP;

case InstructionCategory::dup:
{
// The required initial n stack height for DUPn is provided by
// duplicating the loop counter n-1 times with DUP1.
const auto n = opcode - OP_DUP1 + 1;
// DUP1 ... DUPn DUPn ... POP POP ...
// \ n-1 / \ S-(n-1) / \ S /
return (n - 1) * OP_DUP1 + // Required n stack height.
(stack_limit - (n - 1)) * bytecode{opcode} + // Fill the stack with DUPn.
stack_limit * OP_POP; // Clear whole stack.
}

default:
break;
}
break;
}

return {};
}

/// Generates a benchmark loop with given inner code.
///
/// This generates do-while loop with 255 iterations and it starts with PUSH1 of 255 as the loop
/// counter. The while check is done as `(counter += -1) != 0`. The SUB is avoided because it
/// consumes arguments in unnatural order and additional SWAP would be required.
///
/// The loop counter stays on the stack top. The inner code is allowed to duplicate it, but must not
/// modify it.
bytecode generate_loop_v1(const bytecode& inner_code)
{
const auto counter = push(255);
const auto jumpdest_offset = counter.size();
return counter + OP_JUMPDEST + inner_code + // loop label + inner code
push("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") + // -1
OP_ADD + OP_DUP1 + // counter += (-1)
push(jumpdest_offset) + OP_JUMPI; // jump to jumpdest_offset if counter != 0
}

/// Generates a benchmark loop with given inner code.
///
/// This is improved variant of v1. It has exactly the same instructions and consumes the same
/// amount of gas, but according to performed benchmarks (see "loop_v1" and "loop_v2") it runs
/// faster. And we want the lowest possible loop overhead.
/// The change is to set the loop counter to -255 and check `(counter += 1) != 0`.
bytecode generate_loop_v2(const bytecode& inner_code)
{
const auto counter =
push("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff01"); // -255
const auto jumpdest_offset = counter.size();
return counter + OP_JUMPDEST + inner_code + // loop label + inner code
push(1) + OP_ADD + OP_DUP1 + // counter += 1
push(jumpdest_offset) + OP_JUMPI; // jump to jumpdest_offset if counter != 0
}

bytes_view generate_code(CodeParams params)
{
static std::map<CodeParams, bytecode> cache;

auto& code = cache[params];
if (!code.empty())
return code;

code = generate_loop_v2(generate_loop_inner_code(params)); // Cache it.
return code;
}
} // namespace

void register_synthetic_benchmarks()
{
std::vector<CodeParams> params_list;

// Nops & unops.
for (const auto opcode : {OP_JUMPDEST, OP_ISZERO, OP_NOT})
params_list.push_back({opcode, Mode::min_stack});

// Binops.
for (const auto opcode : {OP_ADD, OP_MUL, OP_SUB, OP_SIGNEXTEND, OP_LT, OP_GT, OP_SLT, OP_SGT,
OP_EQ, OP_AND, OP_OR, OP_XOR, OP_BYTE, OP_SHL, OP_SHR, OP_SAR})
params_list.insert(
params_list.end(), {{opcode, Mode::min_stack}, {opcode, Mode::full_stack}});

// Nullops.
for (const auto opcode : {OP_ADDRESS, OP_CALLER, OP_CALLVALUE, OP_CALLDATASIZE, OP_CODESIZE,
OP_RETURNDATASIZE, OP_PC, OP_MSIZE, OP_GAS})
params_list.insert(
params_list.end(), {{opcode, Mode::min_stack}, {opcode, Mode::full_stack}});

// PUSH.
for (auto opcode = OP_PUSH1; opcode <= OP_PUSH32; opcode = static_cast<evmc_opcode>(opcode + 1))
params_list.insert(
params_list.end(), {{opcode, Mode::min_stack}, {opcode, Mode::full_stack}});

// SWAP.
for (auto opcode = OP_SWAP1; opcode <= OP_SWAP16; opcode = static_cast<evmc_opcode>(opcode + 1))
params_list.insert(params_list.end(), {{opcode, Mode::min_stack}});

// DUP.
for (auto opcode = OP_DUP1; opcode <= OP_DUP16; opcode = static_cast<evmc_opcode>(opcode + 1))
params_list.insert(
params_list.end(), {{opcode, Mode::min_stack}, {opcode, Mode::full_stack}});


for (auto& [vm_name, vm] : registered_vms)
{
RegisterBenchmark((std::string{vm_name} + "/execute/synth/loop_v1").c_str(),
[&vm = vm](State& state) { execute(state, vm, generate_loop_v1({})); });
RegisterBenchmark((std::string{vm_name} + "/execute/synth/loop_v2").c_str(),
[&vm = vm](State& state) { execute(state, vm, generate_loop_v2({})); });
}

for (const auto params : params_list)
{
for (auto& [vm_name, vm] : registered_vms)
{
RegisterBenchmark(
(std::string{vm_name} + "/execute/synth/" + to_string(params)).c_str(),
[&vm = vm, params](State& state) { execute(state, vm, generate_code(params)); })
->Unit(kMicrosecond);
}
}
}
} // namespace evmone::test
9 changes: 9 additions & 0 deletions test/bench/synthetic_benchmarks.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// evmone: Fast Ethereum Virtual Machine implementation
// Copyright 2020 The evmone Authors.
// SPDX-License-Identifier: Apache-2.0
#pragma once

namespace evmone::test
{
void register_synthetic_benchmarks();
}

0 comments on commit b9facfa

Please sign in to comment.