From 7881f09aa7628ac3adf447a7b696a614dcc47fee Mon Sep 17 00:00:00 2001 From: David Banks <47112877+dbanks12@users.noreply.github.com> Date: Sun, 11 Feb 2024 11:52:35 -0500 Subject: [PATCH] docs(yellowpaper): separate section for AVM state (#4440) --- yellow-paper/docs/public-vm/avm-circuit.md | 80 ++++---- yellow-paper/docs/public-vm/avm.md | 178 +++++------------- .../docs/public-vm/gen/_InstructionSet.mdx | 32 ++-- .../{state-model.md => memory-model.md} | 0 yellow-paper/docs/public-vm/state.md | 95 ++++++++++ yellow-paper/docs/public-vm/type-structs.md | 94 +++++++++ yellow-paper/sidebars.js | 4 +- .../InstructionSet/InstructionSet.js | 8 +- 8 files changed, 305 insertions(+), 186 deletions(-) rename yellow-paper/docs/public-vm/{state-model.md => memory-model.md} (100%) create mode 100644 yellow-paper/docs/public-vm/state.md create mode 100644 yellow-paper/docs/public-vm/type-structs.md diff --git a/yellow-paper/docs/public-vm/avm-circuit.md b/yellow-paper/docs/public-vm/avm-circuit.md index 65234632f13..d4c3c17e653 100644 --- a/yellow-paper/docs/public-vm/avm-circuit.md +++ b/yellow-paper/docs/public-vm/avm-circuit.md @@ -1,7 +1,9 @@ # AVM Circuit The AVM circuit's purpose is to prove execution of a sequence of instructions for a public execution request. Regardless of whether execution succeeds or reverts, the circuit always generates a valid proof of execution. -## Circuit Architecture +## Introduction + +### Circuit Architecture Outline The circuit is comprised of the following components: - **Bytecode Table**: includes bytecode for all calls, indexed by call pointer and program counter. - **Instruction Controller**: fetches an instruction from the Bytecode Table. Decodes the instructions into sub-operations to be forwarded to other modules. @@ -72,7 +74,7 @@ User code (AVM bytecode) has no concept of "registers", and so instructions ofte Three intermediate registers exist: $I_a$, $I_b$, and $I_c$. -> Refer to ["AVM State Model"](./state-model) for more details on the absence of "external registers" in the AVM. +> Refer to ["AVM State Model"](./memory-model) for more details on the absence of "external registers" in the AVM. ## Control Flow Unit Processes updates to the program counter and call pointer to ensure that execution proceeds properly from one instruction to the next. @@ -109,7 +111,7 @@ When decoded, instructions that operate on memory map to some Memory Controller ### User Memory -This table tracks all memory `Read` or `Write` operations. As introduced in the ["Memory State Model"](./state-model.md), a memory cell is indexed by a 32-bit unsigned integer (`u32`), i.e., the memory capacity is of $2^{32}$ words. Each word is associated with a tag defining its type (`uninitialized`, `u8`, `u16`, `u32`, `u64`, `u128`, `field`). At the beginning of a new call, each memory cell is of type `uninitialized` and has value 0. +This table tracks all memory `Read` or `Write` operations. As introduced in the ["Memory State Model"](./memory-model.md), a memory cell is indexed by a 32-bit unsigned integer (`u32`), i.e., the memory capacity is of $2^{32}$ words. Each word is associated with a tag defining its type (`uninitialized`, `u8`, `u16`, `u32`, `u64`, `u128`, `field`). At the beginning of a new call, each memory cell is of type `uninitialized` and has value 0. The main property enforcement of this table concerns read/write consistency of every memory cell. This must ensure: @@ -155,40 +157,52 @@ ZK circuit proof systems generally define some mechanism for "public inputs" for ### AVM public inputs structure The VM circuit's I/O (`AvmPublicInputs`) is defined below: + ``` -AvmSideEffects { - newNoteHashes, - newNullifiers, - newL2ToL1Messages, - unencryptedLogs, +AvmSessionInputs { + // Initializes Execution Environment + origin: AztecAddress, + feePerL1Gas: field, + feePerL2Gas: field, + feePerDaGas: field, + globals: PublicGlobalVariables, + address: AztecAddress, + storageAddress: AztecAddress, + sender: AztecAddress, + portal: AztecAddress, + contractCallDepth: field, + isStaticCall: boolean, + isDelegateCall: boolean, + // Initializes Machine State + l1GasLeft: field, + l2GasLeft: field, + daGasLeft: field, } -AvmPublicInputs { - initialEnvironment: ExecutionEnvironment & {l1GasLeft, l2GasLeft, daGasLeft}, - calldata: [], - sideEffects: AvmSideEffects, - storageAccesses, - gasResults: {l1GasLeft, l2GasLeft, daGasLeft}, +AvmSessionResults { + l1GasLeft: field, + l2GasLeft: field, + daGasLeft: field, + reverted: boolean, +} +AvmSessionPublicInputs { + sessionInputs: AvmSessionInputs, + calldata: [field; MAX_CALLDATA_LENGTH], + worldStateAccessTrace: WorldStateAccessTrace, + accruedSubstate: AccruedSubstate, + sessionResults: AvmSessionResults, } ``` -> The `ExecutionEnvironment` structure is defined in [the AVM's high level specification](./avm.md). +> The `ExecutionEnvironment` structure is defined in [the AVM's high level specification](./avm.md). `initialEnvironment` here omits `calldata` and `bytecode`. + +> The `WorldStateAccessTrace` and `AccruedSubstate` types are defined in ["State"](./state). Their vectors are assigned constant/maximum lengths when used as circuit inputs. ### AVM public input columns The `AvmPublicInputs` structure is represented in the VM trace via the following public input columns: -1. `initialEnvironment` has a dedicated column and is used to initialize the initial call's `AvmContext.ExecutionEnvironment` and `AvmContext.MachineState` -1. `calldata` has its own dedicated public input column -1. `sideEffects: AvmSideEffects` - - This represents the final `AccruedSubstate` of the initial contract call - - There is a separate sub-table (columns) for each side-effect vector - - Each row in the `newNoteHashes` sub-table contains `{contractAddress, noteHash}` - - Each row in the `newNullifiers` sub-table contains `{contractAddress, nullifier}` - - Each row in the `newL2ToL1Messages` sub-table contains `{contractAddress, wordIndex, messageWord}` - - where a message containing N words takes up N entries with increasing `wordIndex` - - Each row in the `unencryptedLogs` sub-table contains `{contractAddress, wordIndex, logWord}` - - where a log containing N words takes up N entries with increasing `wordIndex` - - Side effects are present in the trace in execution-order -1. `storageAccesses` - - This contains the first and last public storage access for each slot that is accessed during execution - - Each row in the `storageAccesses` sub-table contains `{contractAddress, slot, value}` - - Storage accesses are present in the trace in execution-order -1. `gasResults: AvmGasResults` - - This is derived from the _final_ `AvmContext.MachineState` of the initial contract call +1. `sessionInputs` has a dedicated column and is used to initialize the initial call's `AvmContext.ExecutionEnvironment` and `AvmContext.MachineState`. +1. `calldata` occupies its own public input column as it is handled differently from the rest of the `ExecutionEnvironment`. It is used to initialize the initial call's `AvmContext.ExecutionEnvironment.calldata`. + - Equivalence is enforced between this `calldata` column and the "input call pointer"'s memory. Through this mechanism, the initial call's `calldata` is placed in a region memory that can be referenced via the `CALLDATACOPY` instruction from within the initial call. +1. `worldStateAccessTrace` is a trace of all world state accesses. Each of its component vectors has a dedicated set of public input columns (a sub-table). An instruction that reads or writes world state must match a trace entry. The [trace type definition in the "State" section] lists, for each trace vector, the instruction that populate its entries. +1. `accruedSubstate` contains the final `AccruedSubstate`. + - This includes the accrued substate of all _unreverted_ sub-contexts. + - Reverted substate is not present in the Circuit I/O as it does not require further validation/processing by downstream circuits. +1. `sessionResults` has a dedicated column and represents the core "results" of the AVM session processed by this circuit (remaining gas, reverted). diff --git a/yellow-paper/docs/public-vm/avm.md b/yellow-paper/docs/public-vm/avm.md index ff478785279..6a955db92db 100644 --- a/yellow-paper/docs/public-vm/avm.md +++ b/yellow-paper/docs/public-vm/avm.md @@ -12,7 +12,7 @@ An Aztec transaction may include one or more **public execution requests**. A pu In order to execute public contract bytecode, the AVM requires some context. An [**execution context**](#execution-context) contains all information necessary to initiate AVM execution, including the relevant contract's bytecode and all state maintained by the AVM. A **contract call** initializes an execution context and triggers AVM execution within that context. -Instruction-by-instruction, the AVM [executes](#execution) the bytecode specified in its context. An **instruction** is a bytecode entry that, when executed, modifies the AVM's execution context according to the instruction's definition in the ["AVM Instruction Set"](./instruction-set). Execution within a context ends when the AVM encounters a [**halt**](#halting). +Instruction-by-instruction, the AVM [executes](#execution) the bytecode specified in its context. An **instruction** is a bytecode entry that, when executed, modifies the AVM's execution context (in particular its [state](./state)) according to the instruction's definition in the ["AVM Instruction Set"](./instruction-set). Execution within a context ends when the AVM encounters a [**halt**](#halting). During execution, additional contract calls may be made. While an [**initial contract call**](#initial-contract-calls) initializes a new execution context directly from a public execution request, a [**nested contract call**](#nested-contract-calls) occurs _during_ AVM execution and is triggered by a **contract call instruction** ([`CALL`](./instruction-set#isa-section-call), [`STATICCALL`](./instruction-set#isa-section-call), or `DELEGATECALL`). It initializes a new execution context (**nested context**) from the current one (**calling context**) and triggers execution within it. When nested call's execution completes, execution proceeds in the calling context. @@ -21,7 +21,7 @@ A **caller** is a contract call's initiator. The caller of an initial contract c ## Outline - [**Public contract bytecode**](#public-contract-bytecode) (aka AVM bytecode) -- [**Execution context**](#execution-context), outlining the AVM's environment and state +- [**Execution context**](#execution-context), outlining the AVM's execution context - [**Execution**](#execution), outlining control flow, gas tracking, normal halting, and exceptional halting - [**Initial contract calls**](#initial-contract-calls), outlining the initiation of a contract call from a public execution request - [**Nested contract calls**](#nested-contract-calls), outlining the initiation of a contract call from an instruction as well as the processing of nested execution results, gas refunds, and state reverts @@ -29,8 +29,9 @@ A **caller** is a contract call's initiator. The caller of an initial contract c > This document is meant to provide a high-level definition of the Aztec Virtual Machine as opposed to a specification of its SNARK implementation. The document therefore mostly omits SNARK or circuit-centric verbiage except when particularly relevant to high-level design decisions. This document is supplemented by the following resources: +- **[AVM State](./state.md)** - **[AVM Instruction Set](./instruction-set)** -- **[AVM Memory Model](./state-model.md)** +- **[AVM Memory Model](./memory-model.md)** - **[AVM Circuit](./avm-circuit.md)** ## Public contract bytecode @@ -49,128 +50,51 @@ Many terms and definitions here are borrowed from the [Ethereum Yellow Paper](ht Initialized by a contract call, an **execution context** includes the information necessary to initiate AVM execution along with all state maintained by the AVM throughout execution: -``` -AvmContext { - environment: ExecutionEnvironment, - machineState: MachineState, - worldState: WorldState, - journal: Journal, - accruedSubstate: AccruedSubstate, - results: ContractCallResults, -} -``` +#### _AvmContext_ +| Field | Type | +| --- | --- | +| environment | `ExecutionEnvironment` | +| [machineState](./state#machine-state) | `MachineState` | +| [worldState](./state#avm-world-state) | `AvmWorldState` | +| [worldStateAccessTrace](./state#world-state-access-trace) | `WorldStateAccessTrace` | +| [accruedSubstate](./state#accrued-substate) | `AccruedSubstate` | +| results | `ContractCallResults` | ### Execution Environment A context's **execution environment** remains constant throughout the context's execution. When a contract call initializes its execution context, it fully specifies the execution environment. -``` -ExecutionEnvironment { - address: AztecAddress, - storageAddress: AztecAddress, - origin: AztecAddress, - sender: AztecAddress, - portal: EthAddress, - feePerL1Gas: field, - feePerL2Gas: field, - feePerDaGas: field, - contractCallDepth: field, - globals: PublicGlobalVariables, - isStaticCall: boolean, - isDelegateCall: boolean, - calldata: [field; ], - bytecode: [field; ], -} -``` - -### Machine State - -A context's **machine state** is transformed on an instruction-per-instruction basis. When a contract call initializes its execution context, it specifies the first few fields (`*GasLeft`) of the machine state and initializes the remaining members as follows: - -``` -MachineState { - l1GasLeft: field, - l2GasLeft: field, - daGasLeft: field, - pc: field = 0, - internalCallStack: Vector = [], // initialized as empty - memory: [field; 2^32] = [0, ..., 0], // all 2^32 entries are initialized to zero -} -``` - -The machine state's entries are defined as follows: -- `l1GasLeft`: how much L1 gas remains -- `l2GasLeft`: how much L2 gas remains -- `daGasLeft`: how much DA (data availability) gas remains -- `pc` (program counter): index to the current instruction being executed in the context's bytecode -- `internalCallStack`: a stack of program counters pushed to and popped from by `INTERNALCALL` and `INTERNALRETURN` instructions -- `memory`: a $2^{32}$ entry memory space accessible by user code (bytecode instructions) and initialized to all zeros upon context initialization - -### World State - -The AVM has access to a subset of [Aztec's persistent global state](../state), and an AVM execution context exposes a limited interface to that state. In particular, while much of Aztec's state is implemented as readable/writeable merkle trees, these structures are exposed in the AVM as simple mappings or vectors with access-limitations. - -An execution context's **world state** is its interface to Aztec's global state. When an [_initial_ contract call](#initial-contract-calls) is made, its context is initialized with a snapshot of Aztec's latest global state. When a [_nested_ contract call](#nested-contract-calls) is made, its context is initialized with a snapshot of the calling context's current world state. - -When a context's execution [halts](#halting), the caller accepts or rejects its world state modifications. If execution [returned](./instruction-set#isa-section-return) without reverting, the caller accepts its world state modifications and updates its world state accordingly. If execution reverted ([normally](./instruction-set#isa-section-revert) or [exceptionally](#exceptional-halting)), the caller rejects its world state modifications. - -A context's world state interface is defined as follows: -``` -WorldState { - contracts: AztecAddress => {bytecode, portalAddress}, // read-only from within AVM - blockHeaders: Vector
, // read-only from within AVM - publicStorage: (AztecAddress, field) => value, // read/write - l1ToL2Messages: field => message, // read-only from within AVM - l2ToL1Messages: Vector<[field; ]>, // append-only (no reads) from within AVM - noteHashes: Vector, // append-only (no reads) from within AVM - nullifiers: Vector, // append-only (no reads) from within AVM -} -``` -> The notation `key => value` describes a mapping from `key` to `value`. - -> Read-only or append-only structures in the world state may have more open access-limitations outside of the AVM, but the AVM's interface imposes the above-listed access-limitations. As an example, private execution can deploy new contracts, but contract deployments are not supported by the AVM. Thus `contracts` is read-only here. - - -### Journal - -**Journal** tracks all world state accesses (reads and writes) that have taken place thus far during a contract call's execution. Unlike world state, a context's journal is accepted by its caller regardless of whether execution reverts. -``` -Journal { - nestedCalls: Vector<(AztecAddress, boolean)>, - blockHeaderReads: Vector<(field, Header)>, - publicStorageAccesses: Vector, - l1ToL2MessageReads: Vector<(L1toL2MessageContext, [field; ])>, - newL2ToL1Messages: Vector<(L2toL1MessageContext, [field; ])>, - newNoteHashes: Vector, - newNullifiers: Vector, -} -``` -> The types tracked in the journal vectors are listed in Aztec's [private function types specification](../circuits/private-function#types) and Aztec's [public kernel types specification](../circuits/public-kernel-tail#types). - -> This journal structure is important for imposing limitations on the maximum number of allowable world state accesses, and for communicating the list of state accesses to the public kernel circuit. - -### Accrued Substate -The **accrued substate**, as coined in the [Ethereum Yellow Paper](https://ethereum.github.io/yellowpaper/paper), contains information that is accrued throughout transaction execution to be "acted upon immediately following the transaction." These are append-only arrays containing state that is not relevant to other calls. Similar to world state, if a contract call returns normally, its substate is appended to its calling context, but if it reverts its substate is rejected by its caller. - -``` -AccruedSubstate { - unencryptedLogs: Vector<[field; ]>, -} -``` +#### _ExecutionEnvironment_ +| Field | Type | Description | +| --- | --- | --- | +| origin | `AztecAddress` | | +| feePerL1Gas | `field` | | +| feePerL2Gas | `field` | | +| feePerDaGas | `field` | | +| globals | `PublicGlobalVariables` | | +| address | `AztecAddress` | | +| storageAddress | `AztecAddress` | | +| sender | `AztecAddress` | | +| portal | `AztecAddress` | | +| contractCallDepth | `field` | | +| isStaticCall | `boolean` | | +| isDelegateCall | `boolean` | | +| calldata | `[field; ]` | | +| bytecode | `[field; ]` | | ### Contract Call Results + Finally, when a contract call halts, it sets the context's **contract call results** to communicate results to the caller. -``` -ContractCallResults { - reverted: boolean, - output: [field; ], -} -``` +#### _ContractCallResults_ +| Field | Type | Description | +| --- | --- | --- | +| `reverted` | `boolean` | | +| `output` | `[field; ]` | | ## Execution -Once an execution context has been initialized for a contract call, the machine state's program counter determines which instruction the AVM executes. For any contract call, the program counter starts at zero, and so instruction execution begins with the very first entry in a contract's bytecode. +Once an execution context has been initialized for a contract call, the [machine state's](./state#machine-state) program counter determines which instruction the AVM executes. For any contract call, the program counter starts at zero, and so instruction execution begins with the very first entry in a contract's bytecode. ### Program Counter and Control Flow @@ -192,7 +116,7 @@ assert machineState.l2GasLeft - instr.l2GasCost > 0 assert machineState.daGasLeft - instr.daGasCost > 0 ``` -> Many instructions (like arithmetic operations) have 0 `l1GasCost` and `daGasCost`. Instructions only incur an L1 or DA cost if they modify the world state, journal, or accrued substate. +> Many instructions (like arithmetic operations) have 0 `l1GasCost` and `daGasCost`. Instructions only incur an L1 or DA cost if they modify the [world state](./state#avm-world-state) or [accrued substate](./state#accrued-substate). If these assertions pass, the machine state's gas left is decreased prior to the instruction's core execution: @@ -297,9 +221,9 @@ The AVM's exceptional halting conditions area listed below: ``` 1. **Maximum contract call calls per execution request (1024) exceeded** ``` - assert journal.nestedCalls.length <= 1024 + assert worldStateAccessTrace.contractCalls.length <= 1024 assert environment.bytecode[machineState.pc].opcode not in {CALL, STATICCALL, DELEGATECALL} - OR journal.nestedCalls.length < 1024 + OR worldStateAccessTrace.contractCalls.length < 1024 ``` 1. **Maximum internal call depth (1024) exceeded** ``` @@ -349,7 +273,7 @@ context = AvmContext { environment = INITIAL_EXECUTION_ENVIRONMENT, machineState = INITIAL_MACHINE_STATE, worldState = , - journal = INITIAL_JOURNAL, + worldStateAccessTrace = { [], [], ... [] } // all trace vectors empty, accruedSubstate = INITIAL_ACCRUED_SUBSTATE, results = INITIAL_CONTRACT_CALL_RESULTS, } @@ -386,16 +310,6 @@ INITIAL_MACHINE_STATE = MachineState { memory = [0, ..., 0], // all 2^32 entries are initialized to zero } -INITIAL_JOURNAL = Journal { - nestedCalls = [], // initialized as empty - blockHeaderReads = [], // initialized as empty - publicStorageAccesses = [], // initialized as empty - l1ToL2MessageReads = [], // initialized as empty - newL2ToL1Messages = [], // initialized as empty - newNoteHashes = [], // initialized as empty - newNullifiers = [], // initialized as empty -} - INITIAL_ACCRUED_SUBSTATE = AccruedSubstate { unencryptedLogs = [], // initialized as empty } @@ -419,7 +333,7 @@ nestedContext = AvmContext { environment: nestedExecutionEnvironment, // defined below machineState: nestedMachineState, // defined below worldState: callingContext.worldState, - journal: callingContext.journal, + worldStateAccessTrace: callingContext.worldStateAccessTrace, accruedSubstate: INITIAL_ACCRUED_SUBSTATE, results: INITIAL_CONTRACT_CALL_RESULTS, } @@ -508,10 +422,10 @@ if !nestedContext.results.reverted AND instr.opcode != STATICCALL_OP: context.accruedSubstate.append(nestedContext.accruedSubstate) ``` -Regardless of whether a nested context has reverted, its journal updates are absorbed into the calling context along with a new `nestedCalls` entry. +Regardless of whether a nested context has reverted, its [world state access trace](./state#world-state-access-trace) updates are absorbed into the calling context along with a new `contractCalls` entry. ``` -context.journal = nestedContext.journal -context.journal.append(nestedContext.address, nestedContext.machineState.reverted) +context.worldStateAccessTrace = nestedContext.worldStateAccessTrace +context.worldStateAccessTrace.contractCalls.append({nestedContext.address, clk}) ``` -> Reminder: a nested call cannot make updates to the world state, journal, or accrued substate if it is a [`STATICCALL`](./instruction-set/#isa-section-staticcall). +> Reminder: a nested call cannot make updates to the world state or accrued substate if it is a [`STATICCALL`](./instruction-set/#isa-section-staticcall). diff --git a/yellow-paper/docs/public-vm/gen/_InstructionSet.mdx b/yellow-paper/docs/public-vm/gen/_InstructionSet.mdx index d11198262d2..051b43bcf29 100644 --- a/yellow-paper/docs/public-vm/gen/_InstructionSet.mdx +++ b/yellow-paper/docs/public-vm/gen/_InstructionSet.mdx @@ -471,7 +471,7 @@ Addition (a + b) - **Category**: Compute - Arithmetic - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -492,7 +492,7 @@ Subtraction (a - b) - **Category**: Compute - Arithmetic - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -513,7 +513,7 @@ Multiplication (a * b) - **Category**: Compute - Arithmetic - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -534,7 +534,7 @@ Unsigned division (a / b) - **Category**: Compute - Arithmetic - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -555,7 +555,7 @@ Equality check (a == b) - **Category**: Compute - Comparators - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -576,7 +576,7 @@ Less-than check (a < b) - **Category**: Compute - Comparators - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -597,7 +597,7 @@ Less-than-or-equals check (a <= b) - **Category**: Compute - Comparators - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -618,7 +618,7 @@ Bitwise AND (a & b) - **Category**: Compute - Bitwise - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -639,7 +639,7 @@ Bitwise OR (a | b) - **Category**: Compute - Bitwise - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -660,7 +660,7 @@ Bitwise XOR (a ^ b) - **Category**: Compute - Bitwise - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -681,7 +681,7 @@ Bitwise NOT (inversion) - **Category**: Compute - Bitwise - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. - **Args**: - **aOffset**: memory offset of the operation's input - **dstOffset**: memory offset specifying where to store operation's result @@ -701,7 +701,7 @@ Bitwise leftward shift (a << b) - **Category**: Compute - Bitwise - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -722,7 +722,7 @@ Bitwise rightward shift (a >> b) - **Category**: Compute - Bitwise - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. + - **inTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for this instruction. - **Args**: - **aOffset**: memory offset of the operation's left input - **bOffset**: memory offset of the operation's right input @@ -743,12 +743,12 @@ Type cast - **Category**: Type Conversions - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **dstTag**: The [tag/size](./state-model#tags-and-tagged-memory) to tag the destination with but not to check inputs against. + - **dstTag**: The [tag/size](./memory-model#tags-and-tagged-memory) to tag the destination with but not to check inputs against. - **Args**: - **aOffset**: memory offset of word to cast - **dstOffset**: memory offset specifying where to store operation's result - **Expression**: `M[dstOffset] = cast(M[aOffset])` -- **Details**: Cast a word in memory based on the `dstTag` specified in the bytecode. Truncates (`M[dstOffset] = M[aOffset] mod 2^dstsize`) when casting to a smaller type, left-zero-pads when casting to a larger type. See [here](./state-model#cast-and-tag-conversions) for more details. +- **Details**: Cast a word in memory based on the `dstTag` specified in the bytecode. Truncates (`M[dstOffset] = M[aOffset] mod 2^dstsize`) when casting to a smaller type, left-zero-pads when casting to a larger type. See [here](./memory-model#cast-and-tag-conversions) for more details. - **Tag updates**: `T[dstOffset] = dstTag` - **Bit-size**: 96 @@ -1188,7 +1188,7 @@ Set a memory word from a constant in the bytecode - **Category**: Machine State - Memory - **Flags**: - **indirect**: Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`. - - **inTag**: The [type/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for SET. + - **inTag**: The [type/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for SET. - **Args**: - **const**: an N-bit constant value from the bytecode to store in memory (any type except `field`) - **dstOffset**: memory offset specifying where to store the constant diff --git a/yellow-paper/docs/public-vm/state-model.md b/yellow-paper/docs/public-vm/memory-model.md similarity index 100% rename from yellow-paper/docs/public-vm/state-model.md rename to yellow-paper/docs/public-vm/memory-model.md diff --git a/yellow-paper/docs/public-vm/state.md b/yellow-paper/docs/public-vm/state.md new file mode 100644 index 00000000000..005f90efa67 --- /dev/null +++ b/yellow-paper/docs/public-vm/state.md @@ -0,0 +1,95 @@ +# State + +This section describes the types of state maintained by the AVM. + +## Machine State + +**Machine state** is transformed on an instruction-per-instruction basis. Each execution context has its own machine state. + +### _MachineState_ + +| Field | Type | Description | +| --- | --- | --- | +| `l1GasLeft` | `field` | Tracks the amount of L1 gas remaining at any point during execution. | +| `l2GasLeft` | `field` | Tracks the amount of L2 gas remaining at any point during execution. | +| `daGasLeft` | `field` | Tracks the amount of DA gas remaining at any point during execution. | +| `pc` | `field` | Index into the contract's bytecode indicating which instruction to execute. Initialized\* to 0. | +| `internalCallStack` | `Vector` | A stack of program counters pushed to and popped from by `INTERNALCALL` and `INTERNALRETURN` instructions. Initialized\* as empty. | +| `memory` | `[field; 2^32]` | A $2^{32}$ entry memory space accessible by user code (bytecode instructions). All 2^32 entries are initialized\* to 0. See ["Memory Model"](./memory-model) for a complete description of AVM memory. | + +## World State + +### AVM's access to Aztec State +[Aztec's global state](../state) is implemented as a few merkle trees that are exposed to the AVM as follows: + +| State | Tree | Tree Type | AVM Access | +| --- | --- | --- | --- | +| Note Hashes | Note Hash Tree | Append-only merkle tree | membership-checks (start-of-block), appends | +| Nullifiers | Nullifier Tree | Indexed merkle tree | membership-checks (latest), appends | +| L1-to-L2 Messages | L1-to-L2 Message Tree | Append-only (or indexed?) merkle tree | membership-checks (start-of-block), leaf-preimage-reads | +| Public Storage | Public Data Tree | Updatable merkle tree | membership-checks (latest), reads, writes | +| Headers | Archive Tree | Append-only merkle tree | membership-checks, leaf-preimage-reads | + +> As described in ["Contract Deployment"](../contract-deployment), contracts are not stored in a dedicated tree. A [contract class](../contract-deployment/classes) is [represented](../contract-deployment/classes#registration) as an unencrypted log containing the `ContractClass` structure (which contains the bytecode) and a nullifier representing the class identifier. The `contractClasses` interface provides access to contract classes indexed by class identifier. A [contract instance](../contract-deployment/instances) is [represented](../contract-deployment/classes#registration) as an unencrypted log containing the `ContractInstance` structure and a nullifier representing the contract address. The `contractInstances` interface provides access to contract instances indexed by contract address. + +### AVM World State + +The AVM does not directly modify Aztec state. Instead, it stages modifications to be applied later pending successful execution. As part of each execution context, the AVM maintains **world state** which is a representation of Aztec state that includes _staged_ modifications. + +As the AVM executes contract code, instructions may read or modify the world state within the current context. If execution within a particular context reverts, staged world state modifications are rejected by the caller. If execution succeeds, staged world state modifications are accepted. + +#### _AvmWorldState_ + +The following table defines an AVM context's world state interface: + +| Field | AVM Instructions & Access | +| --- | --- | +| `noteHashes` | `NOTEHASHEXISTS` (membership-checks (start-of-block)), `EMITNULLIFIER` (appends) | +| `nullifiers` | `NULLIFIERSEXISTS` membership-checks (latest), `EMITNULLIFIER` (appends) | +| `l1ToL2Messages` | `READL1TOL2MSG` (membership-checks (start-of-block) & leaf-preimage-reads) | +| `publicStorage` | `SLOAD` (membership-checks (latest) & reads), `SSTORE` (writes) | +| `headers` | `HEADERMEMBER` (membership-checks & leaf-preimage-reads) | + +### World State Access Trace + +**The circuit implementation of the AVM does _not_ prove that its world state accesses are valid and properly sequenced**, and does not perform actual tree updates. Thus, _all_ world state accesses, **regardless of whether they are rejected due to a revert**, must be traced and eventually handed off to downstream circuits (public kernel and rollup circuits) for comprehensive validation and tree updates. + +This trace of an AVM session's contract calls and world state accesses is named the **world state access trace**. + +> The world state access trace is also important for enforcing limitations on the maximum number of allowable world state accesses. + +#### _WorldStateAccessTrace_ + +Each entry in the world state access trace is listed below along with its type and the instructions that append to it: + +| Trace | Relevant State | Trace Vector Type | Instructions | +| --- | --- | --- | --- | +| `publicStorageReads` | Public Storage | `Vector` | `SLOAD` | +| `publicStorageWrites` | Public Storage | `Vector` | `SSTORE` | +| `l1ToL2MessageReads` | L1-To-L2 Messages | `Vector` | `READL1TOL2MSG` | +| `noteHashChecks` | Note Hashes | `Vector` | `NOTEHASHEXISTS` | +| `newNoteHashes` | Note Hashes | `Vector` | `EMITNOTEHASH` | +| `nullifierChecks` | Nullifiers | `Vector` | `NULLIFIEREXISTS` | +| `newNullifiers` | Nullifiers | `Vector` | `EMITNULLIFIER` | +| `archiveChecks` | Headers | `Vector` | `HEADERMEMBER` | +| `contractCalls` | - | `Vector` | `*CALL` | + +> The types tracked in these trace vectors are defined [here](./type-structs). + +> The syntax `*CALL` is short for `CALL`/`STATICCALL`/`DELEGATECALL`. + +> Aztec tree operations like membership checks, appends, or leaf updates are performed in-circuit by downstream circuits (public kernel and rollup circuits), _after_ AVM execution. The world state access trace is a list of requests made by the AVM for later circuits to perform. + +## Accrued Substate + +> The term "accrued substate" is borrowed from the [Ethereum Yellow Paper](https://ethereum.github.io/yellowpaper/paper). + +**Accrued substate** is accrued throughout a context's execution, but updates to it are strictly never relevant to subsequent instructions, contract calls, or transactions. An execution context is always initialized with empty accrued substate, and instructions can append to its vectors which are _append-only_. If a contract call's execution succeeds, its accrued substate is appended to the caller's. If a contract's execution reverts, its accrued substate is ignored. There is no accrued substate "trace" that includes entries from reverted contexts. + +#### _AccruedSubstate_ +| Field | Type | Instructions | Description | +| --- | --- | --- | --- | +| `unencryptedLogs` | `Vector` | `ULOG` | | +| `sentL2ToL1Messages` | `Vector` | `SENDL1TOL2MSG` | | + +> The types tracked in these vectors are defined [here](./type-structs). diff --git a/yellow-paper/docs/public-vm/type-structs.md b/yellow-paper/docs/public-vm/type-structs.md new file mode 100644 index 00000000000..948497c70c7 --- /dev/null +++ b/yellow-paper/docs/public-vm/type-structs.md @@ -0,0 +1,94 @@ +# Type Definitions + +This section lists type definitions relevant to AVM State and Circuit I/O. + +#### _TracedContractCall_ + +| Field | Type | Description | +| --- | --- | --- | +| `address` | `field` | The called contract address. | +| `storageAddress` | `field` | The storage contract address (different from `address` for delegate calls). | +| `endLifetime` | `field` | End lifetime of a call. Final `clk` for reverted calls, `endLifetime` of parent for successful calls. Successful initial/top-level calls have infinite (max-value) `endLifetime`. | + +#### _TracedL1ToL2MessageRead_ + +| Field | Type | Description | +| --- | --- | --- | +| `callPointer` | `field` | Associates this item with a `TracedContractCall` entry in `worldStateAccessTrace.contractCalls` | +| `portal` | `EthAddress` | | +| `msgKey` | `field` | | +| `message` | `[field; MAX_L1_TO_L2_MESSAGE_LENGTH]` | **Omitted from public inputs** | +| `endLifetime` | `field` | Equivalent to `endLifetime` of the containing contract call. | + +#### _TracedStorageAccess_ + +| Field | Type | Description | +| --- | --- | --- | +| `callPointer` | `field` | Associates this item with a `TracedContractCall` entry in `worldStateAccessTrace.contractCalls`| +| `slot` | `field` | | +| `value` | `field` | | +| `counter` | `field` | | +| `endLifetime` | `field` | Equivalent to `endLifetime` of the containing contract call. The last `counter` at which this read/write should be considered to "exist" if this call or a parent reverted. | + +#### _TracedNoteHash_ + +| Field | Type | Description | +| --- | --- | --- | +| `callPointer` | `field` | Associates this item with a `TracedContractCall` entry in `worldStateAccessTrace.contractCalls` | +| `value` | `field` | | +| `counter` | `field` | | +| `endLifetime` | `field` | Equivalent to `endLifetime` of the containing contract call. The last `counter` at which this object should be considered to "exist" if this call or a parent reverted. | + +> Note: `value` here is not siloed by contract address nor is it made unique with a nonce. Note hashes are siloed and made unique by the public kernel. + +#### _TracedNullifier_ + +| Field | Type | Description | +| --- | --- | --- | +| `callPointer` | `field` | Associates this item with a `TracedContractCall` entry in `worldStateAccessTrace.contractCalls` | +| `value` | `field` | | +| `counter` | `field` | | +| `endLifetime` | `field` | Equivalent to `endLifetime` of the containing contract call. The last `counter` at which this object should be considered to "exist" if this call or a parent reverted. | + +#### _TracedIndexedLeafCheck_ + +| Field | Type | Description | +| --- | --- | --- | +| `callPointer` | `field` | Associates this item with a `TracedContractCall` entry in `worldStateAccessTrace.contractCalls` | +| `leaf` | `field` | | +| `exists` | `field` | | +| `counter` | `field` | | +| `endLifetime` | `field` | Equivalent to `endLifetime` of the containing contract call. | + +#### _TracedLeafCheck_ + +| Field | Type | Description | +| --- | --- | --- | +| `callPointer` | `field` | Associates this item with a `TracedContractCall` entry in `worldStateAccessTrace.contractCalls` | +| `leaf` | `field` | | +| `leafIndex` | `field` | | +| `exists` | `field` | | +| `counter` | `field` | | +| `endLifetime` | `field` | Equivalent to `endLifetime` of the containing contract call. | + +#### _TracedArchiveLeafCheck_ + +| Field | Type | Description | +| --- | --- | --- | +| `leafIndex` | `field` | | +| `leaf` | `field` | | + +#### _UnencryptedLog_ + +| Field | Type | Description | +| --- | --- | --- | +| `address` | `AztecAddress` | Contract address that emitted the log. | +| `log` | `[field; MAX_UNENCRYPTED_LOG_LENGTH]` | | + +#### _SentL2ToL1Message_ + +| Field | Type | Description | +| --- | --- | --- | +| `address` | `AztecAddress` | Contract address that emitted the message. | +| `portal` | `EthAddress` | L1 portal address to send the message to. | +| `message` | `[field, MAX_L2_TO_L1_MESSAGE_LENGTH]` | | diff --git a/yellow-paper/sidebars.js b/yellow-paper/sidebars.js index bfdc9cc4d8a..4d3ae29ae54 100644 --- a/yellow-paper/sidebars.js +++ b/yellow-paper/sidebars.js @@ -190,7 +190,9 @@ const sidebars = { link: { type: "doc", id: "public-vm/avm" }, items: [ "public-vm/avm", - "public-vm/state-model", + "public-vm/state", + "public-vm/type-structs", + "public-vm/memory-model", "public-vm/instruction-set", "public-vm/avm-circuit", "public-vm/control-flow", diff --git a/yellow-paper/src/preprocess/InstructionSet/InstructionSet.js b/yellow-paper/src/preprocess/InstructionSet/InstructionSet.js index ae297566cb4..8ccc09061b1 100644 --- a/yellow-paper/src/preprocess/InstructionSet/InstructionSet.js +++ b/yellow-paper/src/preprocess/InstructionSet/InstructionSet.js @@ -7,9 +7,9 @@ const TOPICS_IN_SECTIONS = [ "Name", "Summary", "Category", "Flags", "Args", "Expression", "Details", "Tag checks", "Tag updates", "Bit-size", ]; -const IN_TAG_DESCRIPTION = "The [tag/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with."; +const IN_TAG_DESCRIPTION = "The [tag/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with."; const IN_TAG_DESCRIPTION_NO_FIELD = IN_TAG_DESCRIPTION + " `field` type is NOT supported for this instruction."; -const DST_TAG_DESCRIPTION = "The [tag/size](./state-model#tags-and-tagged-memory) to tag the destination with but not to check inputs against."; +const DST_TAG_DESCRIPTION = "The [tag/size](./memory-model#tags-and-tagged-memory) to tag the destination with but not to check inputs against."; const INDIRECT_FLAG_DESCRIPTION = "Toggles whether each memory-offset argument is an indirect offset. Rightmost bit corresponds to 0th offset arg, etc. Indirect offsets result in memory accesses like `M[M[offset]]` instead of the more standard `M[offset]`."; const INSTRUCTION_SET_RAW = [ @@ -273,7 +273,7 @@ const INSTRUCTION_SET_RAW = [ ], "Expression": "`M[dstOffset] = cast(M[aOffset])`", "Summary": "Type cast", - "Details": "Cast a word in memory based on the `dstTag` specified in the bytecode. Truncates (`M[dstOffset] = M[aOffset] mod 2^dstsize`) when casting to a smaller type, left-zero-pads when casting to a larger type. See [here](./state-model#cast-and-tag-conversions) for more details.", + "Details": "Cast a word in memory based on the `dstTag` specified in the bytecode. Truncates (`M[dstOffset] = M[aOffset] mod 2^dstsize`) when casting to a smaller type, left-zero-pads when casting to a larger type. See [here](./memory-model#cast-and-tag-conversions) for more details.", "Tag checks": "", "Tag updates": "`T[dstOffset] = dstTag`", }, @@ -681,7 +681,7 @@ context.machineState.pc = loc "Category": "Machine State - Memory", "Flags": [ {"name": "indirect", "description": INDIRECT_FLAG_DESCRIPTION}, - {"name": "inTag", "description": "The [type/size](./state-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for SET."}, + {"name": "inTag", "description": "The [type/size](./memory-model#tags-and-tagged-memory) to check inputs against and tag the destination with. `field` type is NOT supported for SET."}, ], "Args": [ {"name": "const", "description": "an N-bit constant value from the bytecode to store in memory (any type except `field`)", "mode": "immediate"},