Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimizer fixes and guardrails #2914

Merged
merged 35 commits into from
Jun 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
d9301dd
change formatting for asm subcodes
charles-cooper Jun 15, 2022
dfd711b
add eval_once_check
charles-cooper Jun 14, 2022
e624216
enforce exactly-once semantics for unique_symbol
charles-cooper Jun 17, 2022
5851c40
refactor optimizer a bit, add a couple rules
charles-cooper Jun 15, 2022
d5d4fe7
clarify comparison ops
charles-cooper Jun 15, 2022
65411dd
add more tests, comments
charles-cooper Jun 16, 2022
fdcb403
reorder some code
charles-cooper Jun 16, 2022
62c175c
add almost_never cases
charles-cooper Jun 16, 2022
9f43aa4
fix lint
charles-cooper Jun 18, 2022
9b3f747
generalize asm optimizer rule for iszero
charles-cooper Jun 18, 2022
18ca1d1
fix optimizer rule
charles-cooper Jun 18, 2022
6da84f1
add more tests for boundary rules
charles-cooper Jun 18, 2022
abec47c
clarify a condition
charles-cooper Jun 18, 2022
ef770a0
improve coverage
charles-cooper Jun 18, 2022
738403a
improve coverage more
charles-cooper Jun 18, 2022
203d1a3
fix lint
charles-cooper Jun 18, 2022
b857765
add tests for ceil32
charles-cooper Jun 18, 2022
4921792
minor fixes
charles-cooper Jun 18, 2022
2988c35
small fix
charles-cooper Jun 18, 2022
aa59a07
Merge branch 'master' into optimizer_fixes
charles-cooper Jun 18, 2022
2987cb7
don't check unique symbols so strictly
charles-cooper Jun 18, 2022
c148926
add reset names, so that different runs of compile_code will be the same
charles-cooper Jun 18, 2022
c057cb6
refine unique symbols requirement
charles-cooper Jun 18, 2022
28090d2
add eval once checks to create, selfdestruct
charles-cooper Jun 18, 2022
c1c63b2
fix lint
charles-cooper Jun 18, 2022
14ad08f
fix ne optimization
charles-cooper Jun 18, 2022
bb47b0c
fix lint
charles-cooper Jun 18, 2022
3a7f3cb
clean up a branch and add some comments
charles-cooper Jun 19, 2022
0434883
add a note
charles-cooper Jun 19, 2022
55da967
make the symbol checker a bit more formal
charles-cooper Jun 19, 2022
8cf28e9
add some notes about boundary case naming
charles-cooper Jun 19, 2022
5c88810
fix unique_symbols for `deploy`
charles-cooper Jun 19, 2022
992ca75
fix lint
charles-cooper Jun 19, 2022
6a3e230
clean up optimizer control flow
charles-cooper Jun 19, 2022
4d9e8bb
fix mypy
charles-cooper Jun 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 84 additions & 10 deletions tests/compiler/ir/test_optimize_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
(["eq", 1, 2], [0]),
(["lt", 1, 2], [1]),
(["eq", "x", 0], ["iszero", "x"]),
(["ne", "x", 0], ["iszero", ["iszero", "x"]]),
(["ne", "x", 1], None),
(["iszero", ["ne", "x", 1]], ["iszero", ["iszero", ["iszero", ["xor", "x", 1]]]]),
(["eq", ["sload", 0], 0], ["iszero", ["sload", 0]]),
# branch pruner
(["if", ["eq", 1, 2], "pass"], ["seq"]),
Expand All @@ -16,31 +19,89 @@
(["seq", ["assert", ["lt", 1, 2]]], ["seq"]),
(["seq", ["assert", ["lt", 1, 2]], 2], [2]),
# condition rewriter
(["if", ["eq", "x", "y"], "pass"], ["if", ["iszero", ["sub", "x", "y"]], "pass"]),
(["if", ["eq", "x", "y"], "pass"], ["if", ["iszero", ["xor", "x", "y"]], "pass"]),
(["if", "cond", 1, 0], ["if", ["iszero", "cond"], 0, 1]),
(["assert", ["eq", "x", "y"]], ["assert", ["iszero", ["sub", "x", "y"]]]),
(["assert", ["eq", "x", "y"]], ["assert", ["iszero", ["xor", "x", "y"]]]),
# nesting
(["mstore", 0, ["eq", 1, 2]], ["mstore", 0, 0]),
# conditions
(["ge", "x", 0], [1]), # x >= 0 == True
(["ge", ["sload", 0], 0], None), # no-op
(["iszero", ["gt", "x", 2 ** 256 - 1]], [1]), # x >= MAX_UINT256 == False
(["iszero", ["sgt", "x", 2 ** 255 - 1]], [1]), # signed x >= MAX_INT256 == False
(["gt", "x", 2 ** 256 - 1], [0]), # x >= MAX_UINT256 == False
# (x > 0) => x == 0
(["iszero", ["gt", "x", 0]], ["iszero", ["iszero", ["iszero", "x"]]]),
# !(x < MAX_UINT256) => x == MAX_UINT256
(["iszero", ["lt", "x", 2 ** 256 - 1]], ["iszero", ["iszero", ["iszero", ["not", "x"]]]]),
# !(x < MAX_INT256) => x == MAX_INT256
(
["iszero", ["slt", "x", 2 ** 255 - 1]],
["iszero", ["iszero", ["iszero", ["xor", "x", 2 ** 255 - 1]]]],
),
# !(x > MIN_INT256) => x == MIN_INT256
(
["iszero", ["sgt", "x", -(2 ** 255)]],
["iszero", ["iszero", ["iszero", ["xor", "x", -(2 ** 255)]]]],
),
(["sgt", "x", 2 ** 255 - 1], [0]), # signed x > MAX_INT256 == False
(["sge", "x", 2 ** 255 - 1], ["eq", "x", 2 ** 255 - 1]),
(["eq", -1, "x"], ["iszero", ["not", "x"]]),
(["iszero", ["eq", -1, "x"]], ["iszero", ["iszero", ["not", "x"]]]),
(["le", "x", 0], ["iszero", "x"]),
(["le", 0, "x"], [1]),
(["le", 0, ["sload", 0]], None), # no-op
(["ge", "x", 0], [1]),
# boundary conditions
(["slt", "x", -(2 ** 255)], [0]),
(["sle", "x", -(2 ** 255)], ["eq", "x", -(2 ** 255)]),
(["lt", "x", 2 ** 256 - 1], None),
(["le", "x", 2 ** 256 - 1], [1]),
(["gt", "x", 0], ["iszero", ["iszero", "x"]]),
# x < 0 => false
(["lt", "x", 0], [0]),
# 0 < x => x != 0
(["lt", 0, "x"], ["iszero", ["iszero", "x"]]),
(["gt", 5, "x"], None),
(["ge", 5, "x"], None),
# x < 1 => x == 0
(["lt", "x", 1], ["iszero", "x"]),
(["slt", "x", 1], None),
(["gt", "x", 1], None),
(["sgt", "x", 1], None),
(["gt", "x", 2 ** 256 - 2], ["iszero", ["not", "x"]]),
(["lt", "x", 2 ** 256 - 2], None),
(["slt", "x", 2 ** 256 - 2], None),
(["sgt", "x", 2 ** 256 - 2], None),
(["slt", "x", -(2 ** 255) + 1], ["eq", "x", -(2 ** 255)]),
(["sgt", "x", -(2 ** 255) + 1], None),
(["lt", "x", -(2 ** 255) + 1], None),
(["gt", "x", -(2 ** 255) + 1], None),
(["sgt", "x", 2 ** 255 - 2], ["eq", "x", 2 ** 255 - 1]),
(["slt", "x", 2 ** 255 - 2], None),
(["gt", "x", 2 ** 255 - 2], None),
(["lt", "x", 2 ** 255 - 2], None),
# 5 > x; x < 5; x <= 4
(["iszero", ["gt", 5, "x"]], ["iszero", ["le", "x", 4]]),
(["iszero", ["ge", 5, "x"]], None),
# 5 >= x; x <= 5; x < 6
(["ge", 5, "x"], ["lt", "x", 6]),
(["lt", 5, "x"], None),
(["le", 5, "x"], None),
# 5 < x; x > 5; x >= 6
(["iszero", ["lt", 5, "x"]], ["iszero", ["ge", "x", 6]]),
(["iszero", ["le", 5, "x"]], None),
# 5 <= x; x >= 5; x > 4
(["le", 5, "x"], ["gt", "x", 4]),
(["sgt", 5, "x"], None),
(["sge", 5, "x"], None),
# 5 > x; x < 5; x <= 4
(["iszero", ["sgt", 5, "x"]], ["iszero", ["sle", "x", 4]]),
(["iszero", ["sge", 5, "x"]], None),
# 5 >= x; x <= 5; x < 6
(["sge", 5, "x"], ["slt", "x", 6]),
(["slt", 5, "x"], None),
(["sle", 5, "x"], None),
(["slt", "x", -(2 ** 255)], ["slt", "x", -(2 ** 255)]), # unimplemented
# tricky conditions
# 5 < x; x > 5; x >= 6
(["iszero", ["slt", 5, "x"]], ["iszero", ["sge", "x", 6]]),
(["iszero", ["sle", 5, "x"]], None),
# 5 <= x; x >= 5; x > 4
(["sle", 5, "x"], ["sgt", "x", 4]),
# tricky constant folds
(["sgt", 2 ** 256 - 1, 0], [0]), # -1 > 0
(["gt", 2 ** 256 - 1, 0], [1]), # -1 > 0
(["gt", 2 ** 255, 0], [1]), # 0x80 > 0
Expand All @@ -54,12 +115,21 @@
(["sgt", -(2 ** 255), 2 ** 255], [0]), # 0x80 > 0x80
(["slt", 2 ** 255, -(2 ** 255)], [0]), # 0x80 < 0x80
# arithmetic
(["ceil32", "x"], None),
(["ceil32", 0], [0]),
(["ceil32", 1], [32]),
(["ceil32", 32], [32]),
(["ceil32", 33], [64]),
(["ceil32", 95], [96]),
(["ceil32", 96], [96]),
(["ceil32", 97], [128]),
(["add", "x", 0], ["x"]),
(["add", 0, "x"], ["x"]),
(["sub", "x", 0], ["x"]),
(["sub", "x", "x"], [0]),
(["sub", ["sload", 0], ["sload", 0]], None),
(["sub", ["callvalue"], ["callvalue"]], None),
(["sub", -1, ["sload", 0]], ["not", ["sload", 0]]),
(["mul", "x", 1], ["x"]),
(["div", "x", 1], ["x"]),
(["sdiv", "x", 1], ["x"]),
Expand Down Expand Up @@ -90,6 +160,9 @@
(["exp", 1, "x"], [1]),
(["exp", 0, "x"], ["iszero", "x"]),
# bitwise ops
(["xor", "x", 2 ** 256 - 1], ["not", "x"]),
(["and", "x", 2 ** 256 - 1], ["x"]),
(["or", "x", 2 ** 256 - 1], [2 ** 256 - 1]),
(["shr", 0, "x"], ["x"]),
(["sar", 0, "x"], ["x"]),
(["shl", 0, "x"], ["x"]),
Expand Down Expand Up @@ -137,6 +210,7 @@ def test_ir_optimizer(ir):
else:
expected = IRnode.from_list(ir[1])
expected.repr_show_gas = True
optimized.annotation = None
assert optimized == expected


Expand Down
10 changes: 8 additions & 2 deletions vyper/builtin_functions/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from vyper.codegen.core import (
STORE,
IRnode,
_freshname,
add_ofst,
bytes_data_ptr,
calculate_type_for_external_return,
Expand All @@ -24,6 +25,7 @@
clamp_nonzero,
copy_bytes,
ensure_in_memory,
eval_once_check,
eval_seq,
get_bytearray_length,
get_element_ptr,
Expand Down Expand Up @@ -1247,7 +1249,9 @@ class SelfDestruct(BuiltinFunction):
@process_inputs
def build_IR(self, expr, args, kwargs, context):
context.check_is_not_constant("selfdestruct", expr)
return IRnode.from_list(["selfdestruct", args[0]])
return IRnode.from_list(
["seq", eval_once_check(_freshname("selfdestruct")), ["selfdestruct", args[0]]]
)


class BlockHash(BuiltinFunction):
Expand Down Expand Up @@ -1583,7 +1587,9 @@ def _create_ir(value, buf, length, salt=None, checked=True):
create_op = "create2"
args.append(salt)

ret = IRnode.from_list([create_op, *args])
ret = IRnode.from_list(
["seq", eval_once_check(_freshname("create_builtin")), [create_op, *args]]
)

if not checked:
return ret
Expand Down
18 changes: 17 additions & 1 deletion vyper/codegen/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,13 +526,24 @@ def LOAD(ptr: IRnode) -> IRnode:
return IRnode.from_list([op, ptr])


def eval_once_check(name):
# an IRnode which enforces uniqueness. include with a side-effecting
# operation to sanity check that the codegen pipeline only generates
# the side-effecting operation once (otherwise, IR-to-assembly will
# throw a duplicate label exception). there is no runtime overhead
# since the jumpdest gets optimized out in the final stage of assembly.
return IRnode.from_list(["unique_symbol", name])


def STORE(ptr: IRnode, val: IRnode) -> IRnode:
if ptr.location is None:
raise CompilerPanic("cannot dereference non-pointer type")
op = ptr.location.store_op
if op is None:
raise CompilerPanic(f"unreachable {ptr.location}") # pragma: notest
return IRnode.from_list([op, ptr, val])

_check = _freshname(f"{op}_")
return IRnode.from_list(["seq", eval_once_check(_check), [op, ptr, val]])


# Unwrap location
Expand Down Expand Up @@ -706,6 +717,11 @@ def _freshname(name):
return f"{name}{_label}"


def reset_names():
global _label
_label = 0


# returns True if t is ABI encoded and is a type that needs any kind of
# validation
def needs_clamp(t, encoding):
Expand Down
7 changes: 7 additions & 0 deletions vyper/codegen/external_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
from vyper.address_space import MEMORY
from vyper.codegen.abi_encoder import abi_encode
from vyper.codegen.core import (
_freshname,
calculate_type_for_external_return,
check_assign,
check_external_call,
dummy_node_for_type,
eval_once_check,
make_setter,
needs_clamp,
unwrap_location,
Expand Down Expand Up @@ -178,6 +180,11 @@ def _external_call_helper(contract_address, args_ir, call_kwargs, call_expr, con

ret = ["seq"]

# this is a sanity check to prevent double evaluation of the external call
# in the codegen pipeline. if the external call gets doubly evaluated,
# a duplicate label exception will get thrown during assembly.
ret.append(eval_once_check(_freshname(call_expr.node_source_code)))

buf, arg_packer, args_ofst, args_len = _pack_arguments(fn_type, args_ir, context)

ret_unpacker, ret_ofst, ret_len = _unpack_returndata(
Expand Down
27 changes: 27 additions & 0 deletions vyper/codegen/ir_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,16 @@ def _check(condition, err):
raise CodegenPanic(f"2nd argument to label must be var_list, {self}")
self.valency = 0
self._gas = 1 + sum(t.gas for t in self.args)
elif self.value == "unique_symbol":
# a label which enforces uniqueness, and does not appear
# in generated bytecode. this is useful for generating
# internal assertions that a particular IR fragment only
# occurs a single time in a program. note that unique_symbol
# must be distinct from all `unique_symbol`s AS WELL AS all
# `label`s, otherwise IR-to-assembly will raise an exception.
self.valency = 0
self._gas = 0

# var_list names a variable number stack variables
elif self.value == "var_list":
for arg in self.args:
Expand All @@ -291,6 +301,7 @@ def _check(condition, err):
self._gas = sum([arg.gas for arg in self.args])
elif self.value == "deploy":
self.valency = 0
_check(len(self.args) == 3, f"`deploy` should have three args {self}")
self._gas = NullAttractor() # unknown
# Stack variables
else:
Expand Down Expand Up @@ -324,6 +335,22 @@ def is_complex_ir(self):
and self.value.lower() not in do_not_cache
)

# unused, but might be useful for analysis at some point
def unique_symbols(self):
ret = set()
if self.value == "unique_symbol":
ret.add(self.args[0].value)

children = self.args
if self.value == "deploy":
children = [self.args[0], self.args[2]]
for arg in children:
s = arg.unique_symbols()
non_uniques = ret.intersection(s)
assert len(non_uniques) == 0, f"non-unique symbols {non_uniques}"
ret |= s
return ret

@property
def is_literal(self):
return isinstance(self.value, int) or self.value == "multi"
Expand Down
6 changes: 4 additions & 2 deletions vyper/codegen/self_call.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from vyper.address_space import MEMORY
from vyper.codegen.core import make_setter
from vyper.codegen.core import _freshname, eval_once_check, make_setter
from vyper.codegen.ir_node import IRnode, push_label_to_stack
from vyper.codegen.types import TupleType
from vyper.exceptions import StateAccessViolation, StructureException
Expand Down Expand Up @@ -91,7 +91,9 @@ def ir_for_self_call(stmt_expr, context):
# pass return label to subroutine
goto_op += [push_label_to_stack(return_label)]

call_sequence = ["seq", copy_args, goto_op, ["label", return_label, ["var_list"], "pass"]]
call_sequence = ["seq"]
call_sequence.append(eval_once_check(_freshname(stmt_expr.node_source_code)))
call_sequence.extend([copy_args, goto_op, ["label", return_label, ["var_list"], "pass"]])
if return_buffer is not None:
# push return buffer location to stack
call_sequence += [return_buffer]
Expand Down
6 changes: 5 additions & 1 deletion vyper/compiler/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from collections import OrderedDict
from typing import Any, Callable, Dict, Optional, Sequence, Union

from vyper.compiler import output
import vyper.ast as vy_ast # break an import cycle
import vyper.codegen.core as codegen
import vyper.compiler.output as output
from vyper.compiler.phases import CompilerData
from vyper.evm.opcodes import DEFAULT_EVM_VERSION, evm_wrapper
from vyper.typing import (
Expand Down Expand Up @@ -112,6 +114,8 @@ def compile_codes(
):
interfaces = interfaces[contract_name]

# make IR output the same between runs
codegen.reset_names()
compiler_data = CompilerData(
source_code,
contract_name,
Expand Down
2 changes: 1 addition & 1 deletion vyper/compiler/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _build_asm(asm_list):
for node in asm_list:

if isinstance(node, list):
output_string += "[ " + _build_asm(node) + "] "
output_string += "{ " + _build_asm(node) + "} "
continue

if in_push > 0:
Expand Down
Loading