Skip to content

Commit

Permalink
feat(avm): bytecode manager changes (#11347)
Browse files Browse the repository at this point in the history
* We don't need the bytecode hash when simulating, since tracegen should
recompute it anyways. This should save ~25ms per bytecode.
* Use `bytecode_id` instead of `class_id`.
* Add bytecode retrieval events.
  • Loading branch information
fcarreiro authored Jan 21, 2025
1 parent 22613df commit 4a9c072
Show file tree
Hide file tree
Showing 15 changed files with 94 additions and 83 deletions.
1 change: 1 addition & 0 deletions barretenberg/cpp/pil/vm2/execution.pil
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pol commit rop3;
pol commit rop4;

pol commit pc;
pol commit bytecode_id;
pol commit clk;
pol commit last;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace bb::avm2 {
// The entities that will be used in the flavor.
// clang-format off
#define AVM2_PRECOMPUTED_ENTITIES precomputed_bitwise_input_a, precomputed_bitwise_input_b, precomputed_bitwise_op_id, precomputed_bitwise_output, precomputed_clk, precomputed_first_row, precomputed_sel_bitwise
#define AVM2_WIRE_ENTITIES execution_input, alu_dst_addr, alu_ia, alu_ia_addr, alu_ib, alu_ib_addr, alu_ic, alu_op, alu_sel_op_add, execution_addressing_error_idx, execution_addressing_error_kind, execution_base_address_tag, execution_base_address_val, execution_clk, execution_ex_opcode, execution_indirect, execution_last, execution_op1, execution_op1_after_relative, execution_op2, execution_op2_after_relative, execution_op3, execution_op3_after_relative, execution_op4, execution_op4_after_relative, execution_pc, execution_rop1, execution_rop2, execution_rop3, execution_rop4, execution_sel, execution_sel_addressing_error, execution_sel_op1_is_address, execution_sel_op2_is_address, execution_sel_op3_is_address, execution_sel_op4_is_address, lookup_dummy_precomputed_counts, lookup_dummy_dynamic_counts
#define AVM2_WIRE_ENTITIES execution_input, alu_dst_addr, alu_ia, alu_ia_addr, alu_ib, alu_ib_addr, alu_ic, alu_op, alu_sel_op_add, execution_addressing_error_idx, execution_addressing_error_kind, execution_base_address_tag, execution_base_address_val, execution_bytecode_id, execution_clk, execution_ex_opcode, execution_indirect, execution_last, execution_op1, execution_op1_after_relative, execution_op2, execution_op2_after_relative, execution_op3, execution_op3_after_relative, execution_op4, execution_op4_after_relative, execution_pc, execution_rop1, execution_rop2, execution_rop3, execution_rop4, execution_sel, execution_sel_addressing_error, execution_sel_op1_is_address, execution_sel_op2_is_address, execution_sel_op3_is_address, execution_sel_op4_is_address, lookup_dummy_precomputed_counts, lookup_dummy_dynamic_counts
#define AVM2_DERIVED_WITNESS_ENTITIES perm_dummy_dynamic_inv, lookup_dummy_precomputed_inv, lookup_dummy_dynamic_inv
#define AVM2_SHIFTED_ENTITIES execution_sel_shift
#define AVM2_TO_BE_SHIFTED(e) e.execution_sel
Expand Down
61 changes: 32 additions & 29 deletions barretenberg/cpp/src/barretenberg/vm2/generated/flavor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,36 @@ AvmFlavor::AllConstRefValues::AllConstRefValues(
, execution_addressing_error_kind(il[17])
, execution_base_address_tag(il[18])
, execution_base_address_val(il[19])
, execution_clk(il[20])
, execution_ex_opcode(il[21])
, execution_indirect(il[22])
, execution_last(il[23])
, execution_op1(il[24])
, execution_op1_after_relative(il[25])
, execution_op2(il[26])
, execution_op2_after_relative(il[27])
, execution_op3(il[28])
, execution_op3_after_relative(il[29])
, execution_op4(il[30])
, execution_op4_after_relative(il[31])
, execution_pc(il[32])
, execution_rop1(il[33])
, execution_rop2(il[34])
, execution_rop3(il[35])
, execution_rop4(il[36])
, execution_sel(il[37])
, execution_sel_addressing_error(il[38])
, execution_sel_op1_is_address(il[39])
, execution_sel_op2_is_address(il[40])
, execution_sel_op3_is_address(il[41])
, execution_sel_op4_is_address(il[42])
, lookup_dummy_precomputed_counts(il[43])
, lookup_dummy_dynamic_counts(il[44])
, perm_dummy_dynamic_inv(il[45])
, lookup_dummy_precomputed_inv(il[46])
, lookup_dummy_dynamic_inv(il[47])
, execution_sel_shift(il[48])
, execution_bytecode_id(il[20])
, execution_clk(il[21])
, execution_ex_opcode(il[22])
, execution_indirect(il[23])
, execution_last(il[24])
, execution_op1(il[25])
, execution_op1_after_relative(il[26])
, execution_op2(il[27])
, execution_op2_after_relative(il[28])
, execution_op3(il[29])
, execution_op3_after_relative(il[30])
, execution_op4(il[31])
, execution_op4_after_relative(il[32])
, execution_pc(il[33])
, execution_rop1(il[34])
, execution_rop2(il[35])
, execution_rop3(il[36])
, execution_rop4(il[37])
, execution_sel(il[38])
, execution_sel_addressing_error(il[39])
, execution_sel_op1_is_address(il[40])
, execution_sel_op2_is_address(il[41])
, execution_sel_op3_is_address(il[42])
, execution_sel_op4_is_address(il[43])
, lookup_dummy_precomputed_counts(il[44])
, lookup_dummy_dynamic_counts(il[45])
, perm_dummy_dynamic_inv(il[46])
, lookup_dummy_precomputed_inv(il[47])
, lookup_dummy_dynamic_inv(il[48])
, execution_sel_shift(il[49])
{}

AvmFlavor::ProverPolynomials::ProverPolynomials(ProvingKey& proving_key)
Expand Down Expand Up @@ -90,6 +91,7 @@ AvmFlavor::AllConstRefValues AvmFlavor::ProverPolynomials::get_row(size_t row_id
execution_addressing_error_kind[row_idx],
execution_base_address_tag[row_idx],
execution_base_address_val[row_idx],
execution_bytecode_id[row_idx],
execution_clk[row_idx],
execution_ex_opcode[row_idx],
execution_indirect[row_idx],
Expand Down Expand Up @@ -143,6 +145,7 @@ AvmFlavor::CommitmentLabels::CommitmentLabels()
Base::execution_addressing_error_kind = "EXECUTION_ADDRESSING_ERROR_KIND";
Base::execution_base_address_tag = "EXECUTION_BASE_ADDRESS_TAG";
Base::execution_base_address_val = "EXECUTION_BASE_ADDRESS_VAL";
Base::execution_bytecode_id = "EXECUTION_BYTECODE_ID";
Base::execution_clk = "EXECUTION_CLK";
Base::execution_ex_opcode = "EXECUTION_EX_OPCODE";
Base::execution_indirect = "EXECUTION_INDIRECT";
Expand Down
4 changes: 2 additions & 2 deletions barretenberg/cpp/src/barretenberg/vm2/generated/flavor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ class AvmFlavor {
static constexpr bool HasZK = false;

static constexpr size_t NUM_PRECOMPUTED_ENTITIES = 7;
static constexpr size_t NUM_WITNESS_ENTITIES = 41;
static constexpr size_t NUM_WITNESS_ENTITIES = 42;
static constexpr size_t NUM_SHIFTED_ENTITIES = 1;
static constexpr size_t NUM_WIRES = NUM_WITNESS_ENTITIES + NUM_PRECOMPUTED_ENTITIES;
// We have two copies of the witness entities, so we subtract the number of fixed ones (they have no shift), one for
// the unshifted and one for the shifted
static constexpr size_t NUM_ALL_ENTITIES = 49;
static constexpr size_t NUM_ALL_ENTITIES = 50;
// The total number of witnesses including shifts and derived entities.
static constexpr size_t NUM_ALL_WITNESS_ENTITIES = NUM_WITNESS_ENTITIES + NUM_SHIFTED_ENTITIES;

Expand Down
2 changes: 2 additions & 0 deletions barretenberg/cpp/src/barretenberg/vm2/generated/full_row.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ template <typename FF> std::vector<std::string> AvmFullRow<FF>::names()
"execution_addressing_error_kind",
"execution_base_address_tag",
"execution_base_address_val",
"execution_bytecode_id",
"execution_clk",
"execution_ex_opcode",
"execution_indirect",
Expand Down Expand Up @@ -92,6 +93,7 @@ template <typename FF> RefVector<const FF> AvmFullRow<FF>::as_vector() const
execution_addressing_error_kind,
execution_base_address_tag,
execution_base_address_val,
execution_bytecode_id,
execution_clk,
execution_ex_opcode,
execution_indirect,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ template <typename FF_> struct AvmFullRow {

RefVector<const FF> as_vector() const;
static std::vector<std::string> names();
static constexpr size_t SIZE = 48;
static constexpr size_t SIZE = 49;

// Risky but oh so efficient.
FF& get_column(ColumnAndShifts col)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,32 @@ namespace bb::avm2::simulation {

BytecodeId TxBytecodeManager::get_bytecode(const AztecAddress& address)
{
// TODO: in principle we want to do this, but we can't make hints fail. Think about it.
// auto it = resolved_addresses.find(address);
// if (it != resolved_addresses.end()) {
// return it->second;
// }
auto it = resolved_addresses.find(address);
if (it != resolved_addresses.end()) {
return it->second;
}

// TODO: catch errors etc.
// TODO: we should trigger the proper merkle checks etc. The raw DB doesn't.
ContractInstance instance = db.get_contract_instance(address);
ContractClass klass = db.get_contract_class(instance.contract_class_id);
FF hash = compute_public_bytecode_commitment(klass.packed_bytecode);
auto bytecode_id = next_bytecode_id++;
info("Bytecode for ", address, " successfully retrieved!");

// We convert the bytecode to a shared_ptr because it will be shared by some events.
auto shared_bytecode = std::make_shared<std::vector<uint8_t>>(std::move(klass.packed_bytecode));
hash_events.emit({ .class_id = instance.contract_class_id, .bytecode = shared_bytecode, .hash = hash });
hash_events.emit({ .bytecode_id = bytecode_id, .bytecode = shared_bytecode });

// We now save the bytecode so that we don't repeat this process.
auto bytecode_id = next_bytecode_id++;
resolved_addresses[address] = bytecode_id;
bytecodes.emplace(bytecode_id, BytecodeInfo{ .bytecode = shared_bytecode, .class_id = instance.contract_class_id });
bytecodes.emplace(bytecode_id, std::move(shared_bytecode));
retrieval_events.emit({
.bytecode_id = bytecode_id,
.address = address,
.siloed_address = address, // FIXME: compute, check.
.contract_instance = instance,
.contract_class = klass // WARNING: this class has the whole bytecode.
});

return bytecode_id;
}
Expand All @@ -41,23 +46,13 @@ Instruction TxBytecodeManager::read_instruction(BytecodeId bytecode_id, uint32_t
throw std::runtime_error("Bytecode not found");
}

const auto& bytecode = *it->second.bytecode;
const auto& bytecode = *it->second;
// TODO: catch errors etc.
Instruction instruction = decode_instruction(bytecode, pc);

decomposition_events.emit({ .class_id = it->second.class_id, .pc = pc, .instruction = instruction });
decomposition_events.emit({ .bytecode_id = bytecode_id, .pc = pc, .instruction = instruction });

return instruction;
}

ContractClassId TxBytecodeManager::get_class_id(BytecodeId bytecode_id) const
{
auto it = bytecodes.find(bytecode_id);
if (it == bytecodes.end()) {
throw std::runtime_error("Bytecode not found");
}

return it->second.class_id;
}

} // namespace bb::avm2::simulation
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

namespace bb::avm2::simulation {

using BytecodeId = uint32_t;

// Manages the bytecode operations of all calls in a transaction.
// In particular, it will not duplicate hashing and decomposition.
class TxBytecodeManagerInterface {
Expand All @@ -30,34 +28,29 @@ class TxBytecodeManagerInterface {
virtual BytecodeId get_bytecode(const AztecAddress& address) = 0;
// Retrieves an instruction and decomposes it if needed.
virtual Instruction read_instruction(BytecodeId bytecode_id, uint32_t pc) = 0;
// Retrieves the class id of a bytecode, in case you need it.
virtual ContractClassId get_class_id(BytecodeId bytecode_id) const = 0;
};

class TxBytecodeManager : public TxBytecodeManagerInterface {
public:
TxBytecodeManager(RawDataDBInterface& db,
EventEmitterInterface<BytecodeRetrievalEvent>& retrieval_events,
EventEmitterInterface<BytecodeHashingEvent>& hash_events,
EventEmitterInterface<BytecodeDecompositionEvent>& decomposition_events)
: db(db)
, retrieval_events(retrieval_events)
, hash_events(hash_events)
, decomposition_events(decomposition_events)
{}

BytecodeId get_bytecode(const AztecAddress& address) override;
Instruction read_instruction(BytecodeId bytecode_id, uint32_t pc) override;
ContractClassId get_class_id(BytecodeId bytecode_id) const override;

private:
struct BytecodeInfo {
std::shared_ptr<std::vector<uint8_t>> bytecode;
ContractClassId class_id;
};

RawDataDBInterface& db;
EventEmitterInterface<BytecodeRetrievalEvent>& retrieval_events;
EventEmitterInterface<BytecodeHashingEvent>& hash_events;
EventEmitterInterface<BytecodeDecompositionEvent>& decomposition_events;
unordered_flat_map<BytecodeId, const BytecodeInfo> bytecodes;
unordered_flat_map<BytecodeId, std::shared_ptr<std::vector<uint8_t>>> bytecodes;
unordered_flat_map<AztecAddress, BytecodeId> resolved_addresses;
BytecodeId next_bytecode_id = 0;
};
Expand All @@ -69,7 +62,7 @@ class BytecodeManagerInterface {
virtual ~BytecodeManagerInterface() = default;

virtual Instruction read_instruction(uint32_t pc) const = 0;
virtual ContractClassId get_class_id() const = 0;
virtual BytecodeId get_bytecode_id() const = 0;
};

class BytecodeManager : public BytecodeManagerInterface {
Expand All @@ -83,7 +76,7 @@ class BytecodeManager : public BytecodeManagerInterface {
{
return tx_bytecode_manager.read_instruction(bytecode_id, pc);
}
ContractClassId get_class_id() const override { return tx_bytecode_manager.get_class_id(bytecode_id); }
BytecodeId get_bytecode_id() const override { return bytecode_id; }

private:
BytecodeId bytecode_id;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <cstdint>
#include <memory>
#include <optional>
#include <vector>

#include "barretenberg/vm2/common/aztec_types.hpp"
Expand All @@ -12,22 +13,29 @@

namespace bb::avm2::simulation {

// TODO: Implement tracegen for this. This event might need to change. Ideally we'd
// avoid having an event for each iteration of the hashing.
// It really depends on how we want to separate the concerns between simulation and tracegen.
// And wether we want to allow events to explode vertically in tracegen.
using BytecodeId = uint8_t;

// TODO: Implement tracegen for this.
struct BytecodeHashingEvent {
ContractClassId class_id;
BytecodeId bytecode_id;
std::shared_ptr<std::vector<uint8_t>> bytecode;
FF hash;
};

struct BytecodeRetrievalEvent {
BytecodeId bytecode_id;
AztecAddress address;
AztecAddress siloed_address;
ContractInstance contract_instance;
ContractClass contract_class;
bool error = false;
};

// WARNING: These events and the above will be "linked" by the bytecode column (1 byte per row).
// Therefore, when generating the trace from this event, it will be absolutely necessary
// to know where the first row of the bytecode is. That presents design challenges.
// Question: consider processing in tandem?
struct BytecodeDecompositionEvent {
ContractClassId class_id;
BytecodeId bytecode_id;
uint32_t pc;
// TODO: Do we want to have a dep on Instruction here or do we redefine what we need?
Instruction instruction;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#pragma once

#include <list>

#include "barretenberg/vm2/simulation/events/addressing_event.hpp"
#include "barretenberg/vm2/simulation/events/alu_event.hpp"
#include "barretenberg/vm2/simulation/events/bytecode_events.hpp"
Expand All @@ -16,6 +14,7 @@ struct EventsContainer {
EventEmitterInterface<AluEvent>::Container alu;
EventEmitterInterface<MemoryEvent>::Container memory;
EventEmitterInterface<AddressingEvent>::Container addressing;
EventEmitterInterface<BytecodeRetrievalEvent>::Container bytecode_retrieval;
EventEmitterInterface<BytecodeHashingEvent>::Container bytecode_hashing;
EventEmitterInterface<BytecodeDecompositionEvent>::Container bytecode_decomposition;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
#include "barretenberg/vm2/common/instruction_spec.hpp"
#include "barretenberg/vm2/common/memory_types.hpp"
#include "barretenberg/vm2/common/opcodes.hpp"
#include "barretenberg/vm2/simulation/events/bytecode_events.hpp"
#include "barretenberg/vm2/simulation/lib/serialization.hpp"

namespace bb::avm2::simulation {

struct ExecutionEvent {
uint32_t pc;
ContractClassId contract_class_id;
BytecodeId bytecode_id;
Instruction wire_instruction;
const InstructionSpec& instruction_spec;
ExecutionOpCode opcode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ void Execution::execution_loop()
dispatch_opcode(opcode, resolved_operands);

events.emit({ .pc = pc,
.contract_class_id = context.get_bytecode_manager().get_class_id(),
.bytecode_id = context.get_bytecode_manager().get_bytecode_id(),
.wire_instruction = std::move(instruction),
.instruction_spec = spec,
.opcode = opcode,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class MockBytecodeManager : public BytecodeManagerInterface {
~MockBytecodeManager() override;

MOCK_METHOD(Instruction, read_instruction, (uint32_t pc), (const, override));
MOCK_METHOD(ContractClassId, get_class_id, (), (const, override));
MOCK_METHOD(BytecodeId, get_bytecode_id, (), (const, override));
};

} // namespace bb::avm2::simulation
Loading

0 comments on commit 4a9c072

Please sign in to comment.