Skip to content

Commit

Permalink
feat(avm): generic bytecode deserialization (#4441)
Browse files Browse the repository at this point in the history
Resolves #4304
  • Loading branch information
jeanmon authored Feb 6, 2024
1 parent e67d94b commit 934fabc
Show file tree
Hide file tree
Showing 9 changed files with 376 additions and 237 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ std::vector<AvmMiniAluTraceBuilder::AluTraceEntry> AvmMiniAluTraceBuilder::final
*/
FF AvmMiniAluTraceBuilder::add(FF const& a, FF const& b, AvmMemoryTag in_tag, uint32_t const clk)
{
FF c{};
FF c = 0;
bool carry = false;
uint8_t alu_u8_r0{};
uint8_t alu_u8_r1{};
std::array<uint16_t, 8> alu_u16_reg{};
uint8_t alu_u8_r0 = 0;
uint8_t alu_u8_r1 = 0;
std::array<uint16_t, 8> alu_u16_reg{}; // Must be zero-initialized (FF tag case)

uint128_t a_u128{ a };
uint128_t b_u128{ b };
Expand Down Expand Up @@ -136,11 +136,11 @@ FF AvmMiniAluTraceBuilder::add(FF const& a, FF const& b, AvmMemoryTag in_tag, ui
*/
FF AvmMiniAluTraceBuilder::sub(FF const& a, FF const& b, AvmMemoryTag in_tag, uint32_t const clk)
{
FF c{};
FF c = 0;
bool carry = false;
uint8_t alu_u8_r0{};
uint8_t alu_u8_r1{};
std::array<uint16_t, 8> alu_u16_reg{};
uint8_t alu_u8_r0 = 0;
uint8_t alu_u8_r1 = 0;
std::array<uint16_t, 8> alu_u16_reg{}; // Must be zero-initialized (FF tag case)
uint128_t a_u128{ a };
uint128_t b_u128{ b };
uint128_t c_u128 = a_u128 - b_u128;
Expand Down Expand Up @@ -220,12 +220,12 @@ FF AvmMiniAluTraceBuilder::sub(FF const& a, FF const& b, AvmMemoryTag in_tag, ui
*/
FF AvmMiniAluTraceBuilder::mul(FF const& a, FF const& b, AvmMemoryTag in_tag, uint32_t const clk)
{
FF c{};
FF c = 0;
bool carry = false;
uint8_t alu_u8_r0{};
uint8_t alu_u8_r1{};
uint8_t alu_u8_r0 = 0;
uint8_t alu_u8_r1 = 0;

std::array<uint16_t, 8> alu_u16_reg{};
std::array<uint16_t, 8> alu_u16_reg{}; // Must be zero-initialized (FF tag case)

uint128_t a_u128{ a };
uint128_t b_u128{ b };
Expand Down Expand Up @@ -258,8 +258,8 @@ FF AvmMiniAluTraceBuilder::mul(FF const& a, FF const& b, AvmMemoryTag in_tag, ui
uint128_t c_u128 = a_u128 * b_u128;

// Decompose a_u128 and b_u128 over 8 16-bit registers.
std::array<uint16_t, 8> alu_u16_reg_a{};
std::array<uint16_t, 8> alu_u16_reg_b{};
std::array<uint16_t, 8> alu_u16_reg_a; // Will be initialized in for loop below.
std::array<uint16_t, 8> alu_u16_reg_b; // Will be initialized in for loop below.
uint128_t a_trunc_128 = a_u128;
uint128_t b_trunc_128 = b_u128;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#include "AvmMini_deserialization.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_common.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_instructions.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_opcode.hpp"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>

namespace avm_trace {

namespace {

const std::vector<OperandType> three_operand_format = {
OperandType::TAG,
OperandType::UINT32,
OperandType::UINT32,
OperandType::UINT32,
};

// Contrary to TS, the format does not contain the opcode byte which prefixes any instruction.
// The format for OpCode::SET has to be handled separately as it is variable based on the tag.
const std::unordered_map<OpCode, std::vector<OperandType>> OPCODE_WIRE_FORMAT = {
// Compute
// Compute - Arithmetic
{ OpCode::ADD, three_operand_format },
{ OpCode::SUB, three_operand_format },
{ OpCode::MUL, three_operand_format },
{ OpCode::DIV, three_operand_format },
// Execution Environment - Calldata
{ OpCode::CALLDATACOPY, { OperandType::UINT32, OperandType::UINT32, OperandType::UINT32 } },
// Machine State - Internal Control Flow
{ OpCode::JUMP, { OperandType::UINT32 } },
{ OpCode::INTERNALCALL, { OperandType::UINT32 } },
{ OpCode::INTERNALRETURN, {} },
// Machine State - Memory
// OpCode::SET is handled differently
// Control Flow - Contract Calls
{ OpCode::RETURN, { OperandType::UINT32, OperandType::UINT32 } },
};

const std::unordered_map<OperandType, size_t> OPERAND_TYPE_SIZE = {
{ OperandType::TAG, 1 }, { OperandType::UINT8, 1 }, { OperandType::UINT16, 2 },
{ OperandType::UINT32, 4 }, { OperandType::UINT64, 8 }, { OperandType::UINT128, 16 },
};

} // Anonymous namespace

/**
* @brief Parsing of the supplied bytecode into a vector of instructions. It essentially
* checks that each opcode value is in the defined range and extracts the operands
* for each opcode based on the specification from OPCODE_WIRE_FORMAT.
*
* @param bytecode The bytecode to be parsed as a vector of bytes/uint8_t
* @throws runtime_error exception when the bytecode is invalid.
* @return Vector of instructions
*/
std::vector<Instruction> Deserialization::parse(std::vector<uint8_t> const& bytecode)
{
std::vector<Instruction> instructions;
size_t pos = 0;
const auto length = bytecode.size();

while (pos < length) {
const uint8_t opcode_byte = bytecode.at(pos);

if (!Bytecode::is_valid(opcode_byte)) {
throw_or_abort("Invalid opcode byte: " + std::to_string(opcode_byte) +
" at position: " + std::to_string(pos));
}
pos++;

auto const opcode = static_cast<OpCode>(opcode_byte);
std::vector<OperandType> inst_format;

if (opcode == OpCode::SET) {
if (pos == length) {
throw_or_abort("Operand for SET opcode is missing at position " + std::to_string(pos));
}

std::set<uint8_t> const valid_tags = { static_cast<uint8_t>(AvmMemoryTag::U8),
static_cast<uint8_t>(AvmMemoryTag::U16),
static_cast<uint8_t>(AvmMemoryTag::U32),
static_cast<uint8_t>(AvmMemoryTag::U64),
static_cast<uint8_t>(AvmMemoryTag::U128) };
uint8_t set_tag_u8 = bytecode.at(pos);

if (!valid_tags.contains(set_tag_u8)) {
throw_or_abort("Instruction tag for SET opcode is invalid at position " + std::to_string(pos) +
" value: " + std::to_string(set_tag_u8));
}

auto in_tag = static_cast<AvmMemoryTag>(set_tag_u8);
switch (in_tag) {
case AvmMemoryTag::U8:
inst_format = { OperandType::TAG, OperandType::UINT8, OperandType::UINT32 };
break;
case AvmMemoryTag::U16:
inst_format = { OperandType::TAG, OperandType::UINT16, OperandType::UINT32 };
break;
case AvmMemoryTag::U32:
inst_format = { OperandType::TAG, OperandType::UINT32, OperandType::UINT32 };
break;
case AvmMemoryTag::U64:
inst_format = { OperandType::TAG, OperandType::UINT64, OperandType::UINT32 };
break;
case AvmMemoryTag::U128:
inst_format = { OperandType::TAG, OperandType::UINT128, OperandType::UINT32 };
break;
default: // This branch is guarded above.
std::cerr << "This code branch must have been guarded by the tag validation. \n";
assert(false);
}
} else {
inst_format = OPCODE_WIRE_FORMAT.at(opcode);
}

std::vector<Operand> operands;

for (OperandType const& opType : inst_format) {
// No underflow as while condition guarantees pos <= length (after pos++)
if (length - pos < OPERAND_TYPE_SIZE.at(opType)) {
throw_or_abort("Operand is missing at position " + std::to_string(pos));
}

switch (opType) {
case OperandType::TAG: {
uint8_t tag_u8 = bytecode.at(pos);
if (tag_u8 == static_cast<uint8_t>(AvmMemoryTag::U0) || tag_u8 > MAX_MEM_TAG) {
throw_or_abort("Instruction tag is invalid at position " + std::to_string(pos) +
" value: " + std::to_string(tag_u8));
}
operands.emplace_back(static_cast<AvmMemoryTag>(tag_u8));
break;
}
case OperandType::UINT8:
operands.emplace_back(bytecode.at(pos));
break;
case OperandType::UINT16: {
uint16_t operand_u16 = 0;
uint8_t const* pos_ptr = &bytecode.at(pos);
serialize::read(pos_ptr, operand_u16);
operands.emplace_back(operand_u16);
break;
}
case OperandType::UINT32: {
uint32_t operand_u32 = 0;
uint8_t const* pos_ptr = &bytecode.at(pos);
serialize::read(pos_ptr, operand_u32);
operands.emplace_back(operand_u32);
break;
}
case OperandType::UINT64: {
uint64_t operand_u64 = 0;
uint8_t const* pos_ptr = &bytecode.at(pos);
serialize::read(pos_ptr, operand_u64);
operands.emplace_back(operand_u64);
break;
}
case OperandType::UINT128: {
uint128_t operand_u128 = 0;
uint8_t const* pos_ptr = &bytecode.at(pos);
serialize::read(pos_ptr, operand_u128);
operands.emplace_back(operand_u128);
break;
}
}
pos += OPERAND_TYPE_SIZE.at(opType);
}
instructions.emplace_back(opcode, operands);
}
return instructions;
};
} // namespace avm_trace
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#pragma once

#include "barretenberg/numeric/uint128/uint128.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_common.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_instructions.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_opcode.hpp"
#include <cstddef>
#include <cstdint>
#include <unordered_map>
#include <variant>
#include <vector>

namespace avm_trace {

// Possible types for an instruction's operand in its wire format. (Keep in sync with TS code.
// See avm/serialization/instruction_serialization.ts).
// Note that the TAG enum value is not supported in TS and is parsed as UINT8.
enum class OperandType : uint8_t { TAG, UINT8, UINT16, UINT32, UINT64, UINT128 };

class Deserialization {
public:
Deserialization() = default;

static std::vector<Instruction> parse(std::vector<uint8_t> const& bytecode);
};

} // namespace avm_trace
Loading

0 comments on commit 934fabc

Please sign in to comment.