Skip to content

Commit

Permalink
optimizer: supports callsite annotations of inlining, fixes #18773 (#…
Browse files Browse the repository at this point in the history
…41328)

* optimizer: supports callsite annotations of inlining, fixes #18773

Enable `@inline`/`@noinline` annotations on function callsites.
From #40754.

Now `@inline` and `@noinline` can be applied to a code block and then
the compiler will try to (not) inline calls within the block:
```julia
@inline f(...) # The compiler will try to inline `f`

@inline f(...) + g(...) # The compiler will try to inline `f`, `g` and `+`

@inline f(args...) = ... # Of course annotations on a definition is still allowed
```

Here are couple of notes on how those callsite annotations will work:
- callsite annotation always has the precedence over the annotation
  applied to the definition of the called function, whichever we use
  `@inline`/`@noinline`:
  ```julia
  @inline function explicit_inline(args...)
      # body
  end

  let
      @noinline explicit_inline(args...) # this call will not be inlined
  end
  ```
- when callsite annotations are nested, the innermost annotations has
  the precedence
  ```julia
  @noinline let a0, b0 = ...
      a = @inline f(a0)  # the compiler will try to inline this call
      b = notinlined(b0) # the compiler will NOT try to inline this call
      return a, b
  end
  ```
They're both tested and included in documentations.

* set ssaflags on `CodeInfo` construction

* try to keep source if it will be force-inlined

* give inlining when source isn't available

* style nits

* Update base/compiler/ssair/inlining.jl

Co-authored-by: Jameson Nash <[email protected]>

* Update src/method.c

Co-authored-by: Jameson Nash <[email protected]>

* fixup

- remove preprocessed flags from `jl_code_info_set_ir`
- fix duplicated definition warning
- add and fix comments

* more clean up

* add caveat about the recursive call limitation

* update NEWS.md

Co-authored-by: Jameson Nash <[email protected]>
  • Loading branch information
aviatesk and vtjnash authored Sep 1, 2021
1 parent 457764f commit 2a0ab37
Show file tree
Hide file tree
Showing 20 changed files with 402 additions and 134 deletions.
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ New language features
---------------------

* `Module(:name, false, false)` can be used to create a `module` that does not import `Core`. ([#40110])
* `@inline` and `@noinline` annotations may now be used in function bodies. ([#41312])
* `@inline` and `@noinline` annotations can be used within a function body to give an extra
hint about the inlining cost to the compiler. ([#41312])
* `@inline` and `@noinline` annotations can now be applied to a function callsite or block
to enforce the involved function calls to be (or not to be) inlined. ([#41312])
* The default behavior of observing `@inbounds` declarations is now an option via `auto` in `--check-bounds=yes|no|auto` ([#41551])

Language changes
Expand Down Expand Up @@ -39,7 +42,7 @@ New library features

* `@test_throws "some message" triggers_error()` can now be used to check whether the displayed error text
contains "some message" regardless of the specific exception type.
Regular expressions, lists of strings, and matching functions are also supported. ([#41888)
Regular expressions, lists of strings, and matching functions are also supported. ([#41888])

Standard library changes
------------------------
Expand Down
12 changes: 8 additions & 4 deletions base/compiler/abstractinterpretation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me
return nothing
end
mi = mi::MethodInstance
if !force && !const_prop_methodinstance_heuristic(interp, method, mi)
if !force && !const_prop_methodinstance_heuristic(interp, match, mi, sv)
add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
return nothing
end
Expand Down Expand Up @@ -696,7 +696,8 @@ end
# This is a heuristic to avoid trying to const prop through complicated functions
# where we would spend a lot of time, but are probably unlikely to get an improved
# result anyway.
function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method::Method, mi::MethodInstance)
function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance, sv::InferenceState)
method = match.method
if method.is_for_opaque_closure
# Not inlining an opaque closure can be very expensive, so be generous
# with the const-prop-ability. It is quite possible that we can't infer
Expand All @@ -714,7 +715,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method
if isdefined(code, :inferred) && !cache_inlineable
cache_inf = code.inferred
if !(cache_inf === nothing)
cache_inlineable = inlining_policy(interp)(cache_inf) !== nothing
src = inlining_policy(interp, cache_inf, get_curr_ssaflag(sv))
cache_inlineable = src !== nothing
end
end
if !cache_inlineable
Expand Down Expand Up @@ -1908,7 +1910,9 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
if isa(fname, SlotNumber)
changes = StateUpdate(fname, VarState(Any, false), changes, false)
end
elseif hd === :inbounds || hd === :meta || hd === :loopinfo || hd === :code_coverage_effect
elseif hd === :code_coverage_effect ||
(hd !== :boundscheck && # :boundscheck can be narrowed to Bool
hd !== nothing && is_meta_expr_head(hd))
# these do not generate code
else
t = abstract_eval_statement(interp, stmt, changes, frame)
Expand Down
2 changes: 2 additions & 0 deletions base/compiler/inferencestate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -296,3 +296,5 @@ function print_callstack(sv::InferenceState)
sv = sv.parent
end
end

get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
72 changes: 33 additions & 39 deletions base/compiler/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,28 @@ function push!(et::EdgeTracker, ci::CodeInstance)
push!(et, ci.def)
end

struct InliningState{S <: Union{EdgeTracker, Nothing}, T, P}
struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter}
params::OptimizationParams
et::S
mi_cache::T
policy::P
interp::I
end

function default_inlining_policy(@nospecialize(src))
function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8)
if isa(src, CodeInfo) || isa(src, Vector{UInt8})
src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
return src_inferred && src_inlineable ? src : nothing
elseif isa(src, OptimizationState) && isdefined(src, :ir)
return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing
else
# maybe we want to make inference keep the source in a local cache if a statement is going to inlined
# and re-optimize it here with disabling further inlining to avoid infinite optimization loop
# (we can even naively try to re-infer it entirely)
# but it seems like that "single-level-inlining" is more trouble and complex than it's worth
# see https://github.com/JuliaLang/julia/pull/41328/commits/0fc0f71a42b8c9d04b0dafabf3f1f17703abf2e7
return nothing
end
if isa(src, OptimizationState) && isdefined(src, :ir)
return src.src.inlineable ? src.ir : nothing
end
return nothing
end

include("compiler/ssair/driver.jl")
Expand All @@ -57,7 +62,7 @@ mutable struct OptimizationState
inlining = InliningState(params,
EdgeTracker(s_edges, frame.valid_worlds),
WorldView(code_cache(interp), frame.world),
inlining_policy(interp))
interp)
return new(frame.linfo,
frame.src, nothing, frame.stmt_info, frame.mod,
frame.sptypes, frame.slottypes, false,
Expand Down Expand Up @@ -86,7 +91,7 @@ mutable struct OptimizationState
inlining = InliningState(params,
nothing,
WorldView(code_cache(interp), get_world_counter()),
inlining_policy(interp))
interp)
return new(linfo,
src, nothing, stmt_info, mod,
sptypes_from_meth_instance(linfo), slottypes, false,
Expand Down Expand Up @@ -125,9 +130,15 @@ const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError
# const SLOT_CALLED = 64

# This statement was marked as @inbounds by the user. If replaced by inlining,
# any contained boundschecks may be removed
const IR_FLAG_INBOUNDS = 0x01
# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c

# This statement is marked as @inbounds by user.
# Ff replaced by inlining, any contained boundschecks may be removed.
const IR_FLAG_INBOUNDS = 0x01 << 0
# This statement is marked as @inline by user
const IR_FLAG_INLINE = 0x01 << 1
# This statement is marked as @noinline by user
const IR_FLAG_NOINLINE = 0x01 << 2
# This statement may be removed if its result is unused. In particular it must
# thus be both pure and effect free.
const IR_FLAG_EFFECT_FREE = 0x01 << 4
Expand Down Expand Up @@ -173,6 +184,9 @@ function isinlineable(m::Method, me::OptimizationState, params::OptimizationPara
return inlineable
end

is_stmt_inline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_INLINE != 0
is_stmt_noinline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_NOINLINE != 0

# These affect control flow within the function (so may not be removed
# if there is no usage within the function), but don't affect the purity
# of the function as a whole.
Expand Down Expand Up @@ -358,42 +372,22 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv::
end
renumber_ir_elements!(code, changemap, labelmap)

inbounds_depth = 0 # Number of stacked inbounds
meta = Any[]
flags = fill(0x00, length(code))
for i = 1:length(code)
stmt = code[i]
if isexpr(stmt, :inbounds)
arg1 = stmt.args[1]
if arg1 === true # push
inbounds_depth += 1
elseif arg1 === false # clear
inbounds_depth = 0
elseif inbounds_depth > 0 # pop
inbounds_depth -= 1
end
stmt = nothing
else
stmt = normalize(stmt, meta)
end
code[i] = stmt
if !(stmt === nothing)
if inbounds_depth > 0
flags[i] |= IR_FLAG_INBOUNDS
end
end
code[i] = remove_meta!(code[i], meta)
end
strip_trailing_junk!(ci, code, stmtinfo, flags)
strip_trailing_junk!(ci, code, stmtinfo)
cfg = compute_basic_blocks(code)
types = Any[]
stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, flags)
stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, ci.ssaflags)
ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable::Union{Vector{LineInfoNode},Vector{Any}}), sv.slottypes, meta, sv.sptypes)
return ir
end

function normalize(@nospecialize(stmt), meta::Vector{Any})
function remove_meta!(@nospecialize(stmt), meta::Vector{Any})
if isa(stmt, Expr)
if stmt.head === :meta
head = stmt.head
if head === :meta
args = stmt.args
if length(args) > 0
push!(meta, stmt)
Expand Down
Loading

0 comments on commit 2a0ab37

Please sign in to comment.