Skip to content

Commit

Permalink
[mypyc] Remove unused labels and gotos (#15244)
Browse files Browse the repository at this point in the history
This PR removes labels which are never jumped to, as well as removing
gotos that jump directly to the next label (and removes the now
redundant label). I had a small issue with the `GetAttr` op code as it
would insert C code after the end of the branch which could not be
detected at the IR level, as seen in
mypyc/mypyc#600 (comment) .

Here are some basic stats on the file sizes before and after this PR:

Building with `MYPY_OPT_LEVEL=0`, `MYPY_DEBUG_LEVEL=1`:

| Branch   | `.so` size      | `.c` size       | `.c` lines      |
|----------|-----------------|-----------------|-----------------|
| `master` | 43.9 MB         | 79.7 MB         | 2290675         |
| This PR  | 43.6 MB (-0.6%) | 78.6 MB (-1.4%) | 2179967 (-4.8%) |

Building with `MYPY_OPT_LEVEL=3`, `MYPY_DEBUG_LEVEL=0`:

| Branch   | `.so` size | `.c` size | `.c` lines |
|----------|------------|-----------|------------|
| `master` | 26.5 MB    | 79.7 MB   | 2291316    |
| This PR  | 26.5 MB    | 78.7 MB   | 2179701    |

(I don't know why the number of lines changes between optimization
levels. It's only a couple hundred lines, perhaps Mypyc is emitting
different code depending on optimization level?)

Unoptimized builds seem to benefit the most, while optimized builds look
pretty much the same. I didn't check to see if they are *exactly* the
same binary, but they are probably close if not the same. The biggest
win is the drop in the number of lines C code being generated.

Closes mypyc/mypyc#600
  • Loading branch information
dosisod authored May 22, 2023
1 parent 2ede35f commit 6c7e480
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 11 deletions.
3 changes: 3 additions & 0 deletions mypyc/codegen/emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ def emit_label(self, label: BasicBlock | str) -> None:
if isinstance(label, str):
text = label
else:
if label.label == 0 or not label.referenced:
return

text = self.label(label)
# Extra semicolon prevents an error when the next line declares a tempvar
self.fragments.append(f"{text}: ;\n")
Expand Down
34 changes: 30 additions & 4 deletions mypyc/codegen/emitfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CallC,
Cast,
ComparisonOp,
ControlOp,
DecRef,
Extend,
Float,
Expand Down Expand Up @@ -123,6 +124,28 @@ def generate_native_function(
for i, block in enumerate(blocks):
block.label = i

# Find blocks that are never jumped to or are only jumped to from the
# block directly above it. This allows for more labels and gotos to be
# eliminated during code generation.
for block in fn.blocks:
terminator = block.terminator
assert isinstance(terminator, ControlOp)

for target in terminator.targets():
is_next_block = target.label == block.label + 1

# Always emit labels for GetAttr error checks since the emit code that
# generates them will add instructions between the branch and the
# next label, causing the label to be wrongly removed. A better
# solution would be to change the IR so that it adds a basic block
# inbetween the calls.
is_problematic_op = isinstance(terminator, Branch) and any(
isinstance(s, GetAttr) for s in terminator.sources()
)

if not is_next_block or is_problematic_op:
fn.blocks[target.label].referenced = True

common = frequently_executed_blocks(fn.blocks[0])

for i in range(len(blocks)):
Expand Down Expand Up @@ -216,17 +239,20 @@ def visit_branch(self, op: Branch) -> None:

if false is self.next_block:
if op.traceback_entry is None:
self.emit_line(f"if ({cond}) goto {self.label(true)};")
if true is not self.next_block:
self.emit_line(f"if ({cond}) goto {self.label(true)};")
else:
self.emit_line(f"if ({cond}) {{")
self.emit_traceback(op)
self.emit_lines("goto %s;" % self.label(true), "}")
else:
self.emit_line(f"if ({cond}) {{")
self.emit_traceback(op)
self.emit_lines(
"goto %s;" % self.label(true), "} else", " goto %s;" % self.label(false)
)

if true is not self.next_block:
self.emit_line("goto %s;" % self.label(true))

self.emit_lines("} else", " goto %s;" % self.label(false))

def visit_return(self, op: Return) -> None:
value_str = self.reg(op.value)
Expand Down
1 change: 1 addition & 0 deletions mypyc/ir/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def __init__(self, label: int = -1) -> None:
self.label = label
self.ops: list[Op] = []
self.error_handler: BasicBlock | None = None
self.referenced = False

@property
def terminated(self) -> bool:
Expand Down
8 changes: 1 addition & 7 deletions mypyc/test/test_emitfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,12 +894,7 @@ def test_simple(self) -> None:
generate_native_function(fn, emitter, "prog.py", "prog")
result = emitter.fragments
assert_string_arrays_equal(
[
"CPyTagged CPyDef_myfunc(CPyTagged cpy_r_arg) {\n",
"CPyL0: ;\n",
" return cpy_r_arg;\n",
"}\n",
],
["CPyTagged CPyDef_myfunc(CPyTagged cpy_r_arg) {\n", " return cpy_r_arg;\n", "}\n"],
result,
msg="Generated code invalid",
)
Expand All @@ -922,7 +917,6 @@ def test_register(self) -> None:
[
"PyObject *CPyDef_myfunc(CPyTagged cpy_r_arg) {\n",
" CPyTagged cpy_r_r0;\n",
"CPyL0: ;\n",
" cpy_r_r0 = 10;\n",
" CPy_Unreachable();\n",
"}\n",
Expand Down

0 comments on commit 6c7e480

Please sign in to comment.