Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(avm): Bytecode parsing and proof generation #4191

Merged
merged 5 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "./utils.hpp"

namespace bb::utils {

std::vector<uint8_t> hex_to_bytes(const std::string& hex)
{
std::vector<uint8_t> bytes;

for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
bytes.push_back(static_cast<uint8_t>(strtol(byteString.c_str(), nullptr, 16)));
}

return bytes;
}

} // namespace bb::utils
17 changes: 17 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include <cstdint>
#include <string>
#include <vector>

namespace bb::utils {

/**
* @brief Routine to transform hexstring to vector of bytes.
*
* @param Hexadecimal string representation.
* @return Vector of uint8_t values.
*/
std::vector<uint8_t> hex_to_bytes(const std::string& hex);

} // namespace bb::utils
24 changes: 6 additions & 18 deletions barretenberg/cpp/src/barretenberg/crypto/ecdsa/ecdsa.test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "ecdsa.hpp"
#include "barretenberg/common/serialize.hpp"
#include "barretenberg/common/utils.hpp"
#include "barretenberg/ecc/curves/grumpkin/grumpkin.hpp"
#include "barretenberg/ecc/curves/secp256r1/secp256r1.hpp"
#include "barretenberg/serialize/test_helper.hpp"
Expand Down Expand Up @@ -89,23 +90,10 @@ TEST(ecdsa, recover_public_key_secp256r1_sha256)
EXPECT_EQ(recovered_public_key, account.public_key);
}

std::vector<uint8_t> HexToBytes(const std::string& hex)
{
std::vector<uint8_t> bytes;

for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byteString = hex.substr(i, 2);
uint8_t byte = (uint8_t)strtol(byteString.c_str(), NULL, 16);
bytes.push_back(byte);
}

return bytes;
}

TEST(ecdsa, check_overflowing_r_and_s_are_rejected)
{

std::vector<uint8_t> message_vec = HexToBytes("41414141");
std::vector<uint8_t> message_vec = utils::hex_to_bytes("41414141");

std::string message(message_vec.begin(), message_vec.end());
crypto::ecdsa_signature signature;
Expand Down Expand Up @@ -181,10 +169,10 @@ TEST(ecdsa, verify_signature_secp256r1_sha256_NIST_1)
};

crypto::ecdsa_signature sig{ r, s, 27 };
std::vector<uint8_t> message_vec =
HexToBytes("5905238877c77421f73e43ee3da6f2d9e2ccad5fc942dcec0cbd25482935faaf416983fe165b1a045ee2bcd2e6dca3bdf46"
"c4310a7461f9a37960ca672d3feb5473e253605fb1ddfd28065b53cb5858a8ad28175bf9bd386a5e471ea7a65c17cc934a9"
"d791e91491eb3754d03799790fe2d308d16146d5c9b0d0debd97d79ce8");
std::vector<uint8_t> message_vec = utils::hex_to_bytes(
"5905238877c77421f73e43ee3da6f2d9e2ccad5fc942dcec0cbd25482935faaf416983fe165b1a045ee2bcd2e6dca3bdf46"
"c4310a7461f9a37960ca672d3feb5473e253605fb1ddfd28065b53cb5858a8ad28175bf9bd386a5e471ea7a65c17cc934a9"
"d791e91491eb3754d03799790fe2d308d16146d5c9b0d0debd97d79ce8");
std::string message(message_vec.begin(), message_vec.end());

bool result = crypto::ecdsa_verify_signature<Sha256Hasher, secp256r1::fq, secp256r1::fr, secp256r1::g1>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "barretenberg/proof_system/circuit_builder/circuit_builder_base.hpp"
#include "barretenberg/proof_system/circuit_builder/generated/AvmMini_circuit_builder.hpp"
#include <cstdint>

using Flavor = bb::honk::flavor::AvmMiniFlavor;
using FF = Flavor::FF;
Expand All @@ -12,6 +13,9 @@ namespace avm_trace {
// Number of rows
static const size_t AVM_TRACE_SIZE = 256;
enum class IntermRegister : uint32_t { IA = 0, IB = 1, IC = 2 };

// Keep following enum in sync with MAX_NEM_TAG below
enum class AvmMemoryTag : uint32_t { U0 = 0, U8 = 1, U16 = 2, U32 = 3, U64 = 4, U128 = 5, FF = 6 };
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could go in the enum itself as MAX, then you can just check for < AvmMemoryTag.MAX, this will ensure it remains up to date

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then, MAX would be a valid value for the enum which is not what I want.

Adding a method in an enum class in cpp is also not possible unfortunately.
Do you see a better way?

static const uint32_t MAX_MEM_TAG = 6;

} // namespace avm_trace
230 changes: 230 additions & 0 deletions barretenberg/cpp/src/barretenberg/vm/avm_trace/AvmMini_execution.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
#include "AvmMini_execution.hpp"
#include "barretenberg/common/serialize.hpp"
#include "barretenberg/proof_system/circuit_builder/generated/AvmMini_circuit_builder.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_common.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_instructions.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_opcode.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_trace.hpp"
#include "barretenberg/vm/generated/AvmMini_composer.hpp"
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>

namespace avm_trace {

/**
* @brief Run the bytecode, generate the corresponding execution trace and prove the correctness
* of the execution of the supplied bytecode.
*
* @param bytecode A vector of bytes representing the bytecode to execute.
* @param calldata expressed as a vector of finite field elements.
* @throws runtime_error exception when the bytecode is invalid.
* @return A zk proof of the execution.
*/
plonk::proof Execution::run_and_prove(std::vector<uint8_t> const& bytecode, std::vector<FF> const& calldata)
{
auto instructions = parse(bytecode);
auto trace = gen_trace(instructions, calldata);
auto circuit_builder = bb::AvmMiniCircuitBuilder();
circuit_builder.set_trace(std::move(trace));

auto composer = bb::honk::AvmMiniComposer();
auto prover = composer.create_prover(circuit_builder);
return prover.construct_proof();
}

/**
* @brief Parsing of the supplied bytecode into a vector of instructions. It essentially
* checks that each opcode value is in the defined range and extracts the operands
* for each opcode.
*
* @param bytecode The bytecode to be parsed as a vector of bytes/uint8_t
* @throws runtime_error exception when the bytecode is invalid.
* @return Vector of instructions
*/
std::vector<Instruction> Execution::parse(std::vector<uint8_t> const& bytecode)
{
std::vector<Instruction> instructions;
size_t pos = 0;
const auto length = bytecode.size();

while (pos < length) {
const uint8_t opcode_byte = bytecode.at(pos);
pos += AVM_OPCODE_BYTE_LENGTH;

if (!Bytecode::is_valid(opcode_byte)) {
throw std::runtime_error("Invalid opcode byte: " + std::to_string(opcode_byte));
}

const auto opcode = static_cast<OpCode>(opcode_byte);
auto in_tag_u8 = static_cast<uint8_t>(AvmMemoryTag::U0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whats the casting for, is u8 = 0 not enough?
question asked as a cpp noob

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goal here is to keep in sync with the value defined in the enum. Sure, normally the enum value for U0 should always be 0, but if we decide to change the above code will become invalid.

Even though the enum is defined over uint8_t, the static cast is required otherwise compiler screams.


if (Bytecode::has_in_tag(opcode)) {
if (pos + AVM_IN_TAG_BYTE_LENGTH > length) {
throw std::runtime_error("Instruction tag missing at position " + std::to_string(pos));
}
in_tag_u8 = bytecode.at(pos);
if (in_tag_u8 == static_cast<uint8_t>(AvmMemoryTag::U0) || in_tag_u8 > MAX_MEM_TAG) {
throw std::runtime_error("Instruction tag is invalid at position " + std::to_string(pos) +
" value: " + std::to_string(in_tag_u8));
}
pos += AVM_IN_TAG_BYTE_LENGTH;
}

auto const in_tag = static_cast<AvmMemoryTag>(in_tag_u8);
std::vector<uint32_t> operands{};
size_t num_of_operands{};
size_t operands_size{};

// SET opcode particularity about the number of operands depending on the
// instruction tag. Namely, a constant of type instruction tag and not a
// memory address is passed in the operands.
// The bytecode of the operands is of the form CONSTANT || dst_offset
// CONSTANT is of size k bits for type Uk, k=8,16,32,64,128
// dst_offset is of size 32 bits
// CONSTANT has to be decomposed into 32-bit chunks
if (opcode == OpCode::SET) {
switch (in_tag) {
case AvmMemoryTag::U8:
num_of_operands = 2;
operands_size = 5;
break;
case AvmMemoryTag::U16:
num_of_operands = 2;
operands_size = 6;
break;
case AvmMemoryTag::U32:
num_of_operands = 2;
operands_size = 8;
break;
case AvmMemoryTag::U64:
num_of_operands = 3;
operands_size = 12;
break;
case AvmMemoryTag::U128:
num_of_operands = 5;
operands_size = 20;
break;
default:
throw std::runtime_error("Instruction tag for SET opcode is invalid at position " +
std::to_string(pos) + " value: " + std::to_string(in_tag_u8));
break;
}
} else {
num_of_operands = Bytecode::OPERANDS_NUM.at(opcode);
operands_size = AVM_OPERAND_BYTE_LENGTH * num_of_operands;
}

if (pos + operands_size > length) {
throw std::runtime_error("Operand is missing at position " + std::to_string(pos));
}

// We handle operands which are encoded with less than 4 bytes.
// This occurs for opcode SET and tag U8 and U16.
if (opcode == OpCode::SET && in_tag == AvmMemoryTag::U8) {
operands.push_back(static_cast<uint32_t>(bytecode.at(pos)));
pos++;
num_of_operands--;
} else if (opcode == OpCode::SET && in_tag == AvmMemoryTag::U16) {
uint8_t const* ptr = &bytecode.at(pos);
uint16_t operand{};
serialize::read(ptr, operand);
operands.push_back(static_cast<uint32_t>(operand));
pos += 2;
num_of_operands--;
}

// Operands of size of 32 bits.
for (size_t i = 0; i < num_of_operands; i++) {
uint8_t const* ptr = &bytecode.at(pos);
uint32_t operand{};
serialize::read(ptr, operand);
operands.push_back(operand);
pos += AVM_OPERAND_BYTE_LENGTH;
}

instructions.emplace_back(opcode, operands, static_cast<AvmMemoryTag>(in_tag));
}

return instructions;
}

/**
* @brief Generate the execution trace pertaining to the supplied instructions.
*
* @param instructions A vector of the instructions to be executed.
* @param calldata expressed as a vector of finite field elements.
* @return The trace as a vector of Row.
*/
std::vector<Row> Execution::gen_trace(std::vector<Instruction> const& instructions, std::vector<FF> const& calldata)
{
AvmMiniTraceBuilder trace_builder{};

for (auto const& inst : instructions) {
switch (inst.op_code) {
case OpCode::ADD:
trace_builder.add(inst.operands.at(0), inst.operands.at(1), inst.operands.at(2), inst.in_tag);
break;
case OpCode::SUB:
trace_builder.sub(inst.operands.at(0), inst.operands.at(1), inst.operands.at(2), inst.in_tag);
break;
case OpCode::MUL:
trace_builder.mul(inst.operands.at(0), inst.operands.at(1), inst.operands.at(2), inst.in_tag);
break;
case OpCode::DIV:
trace_builder.div(inst.operands.at(0), inst.operands.at(1), inst.operands.at(2), inst.in_tag);
break;
Comment on lines +165 to +177
Copy link
Collaborator

@dbanks12 dbanks12 Jan 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably best to think about how we've implemented the TS instruction classes and the TS "interpreter" and keep track of our decisions to do things the same/differently here.

I'm perfectly fine with the answer "let's just make it a switch-case to keep it simple for now", or "this way of representing instructions and this switch-case may be more efficient than the more object-oriented/functional way it's handled in TS." But it'd be good to get it written in comments or elsewhere just so in the future it's easier to decide to change it or leave it alone.

case OpCode::CALLDATACOPY:
trace_builder.calldata_copy(inst.operands.at(0), inst.operands.at(1), inst.operands.at(2), calldata);
break;
case OpCode::JUMP:
trace_builder.jump(inst.operands.at(0));
break;
case OpCode::INTERNALCALL:
trace_builder.internal_call(inst.operands.at(0));
break;
case OpCode::INTERNALRETURN:
trace_builder.internal_return();
break;
case OpCode::SET: {
uint32_t dst_offset{};
uint128_t val{};
switch (inst.in_tag) {
case AvmMemoryTag::U8:
case AvmMemoryTag::U16:
case AvmMemoryTag::U32:
// U8, U16, U32 value represented in a single uint32_t operand
val = inst.operands.at(0);
dst_offset = inst.operands.at(1);
break;
case AvmMemoryTag::U64: // value represented as 2 uint32_t operands
val = inst.operands.at(0);
val <<= 32;
val += inst.operands.at(1);
dst_offset = inst.operands.at(2);
break;
case AvmMemoryTag::U128: // value represented as 4 uint32_t operands
for (size_t i = 0; i < 4; i++) {
val += inst.operands.at(i);
val <<= 32;
}
dst_offset = inst.operands.at(4);
break;
default:
break;
}
trace_builder.set(val, dst_offset, inst.in_tag);
break;
}
case OpCode::RETURN:
trace_builder.return_op(inst.operands.at(0), inst.operands.at(1));
break;
default:
break;
}
}
return trace_builder.finalize();
}

} // namespace avm_trace
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include "barretenberg/plonk/proof_system/types/proof.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_common.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_instructions.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_trace.hpp"
#include <cstddef>
#include <cstdint>
#include <vector>

namespace avm_trace {

class Execution {
public:
Execution() = default;

static size_t const AVM_OPERAND_BYTE_LENGTH = 4; // Keep in sync with TS code
static_assert(sizeof(uint32_t) / sizeof(uint8_t) == AVM_OPERAND_BYTE_LENGTH);

static size_t const AVM_OPCODE_BYTE_LENGTH = 1; // Keep in sync with TS code
static size_t const AVM_IN_TAG_BYTE_LENGTH = 1; // Keep in sync with TS code

static std::vector<Instruction> parse(std::vector<uint8_t> const& bytecode);
static std::vector<Row> gen_trace(std::vector<Instruction> const& instructions, std::vector<FF> const& calldata);
static plonk::proof run_and_prove(std::vector<uint8_t> const& bytecode, std::vector<FF> const& calldata);
};

} // namespace avm_trace
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

#include "barretenberg/vm/avm_trace/AvmMini_common.hpp"
#include "barretenberg/vm/avm_trace/AvmMini_opcode.hpp"
#include <cstdint>
#include <vector>

namespace avm_trace {

class Instruction {
public:
OpCode op_code;
std::vector<uint32_t> operands;
AvmMemoryTag in_tag;

Instruction() = delete;
explicit Instruction(OpCode op_code, std::vector<uint32_t> operands, AvmMemoryTag in_tag)
: op_code(op_code)
, operands(std::move(operands))
, in_tag(in_tag){};
};

} // namespace avm_trace
Loading
Loading