From 8b7f9472d6bba0fd13690e924c57a836719601f6 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 11 Dec 2020 11:00:59 -0500 Subject: [PATCH] optimize invokelatest call performance People previously were making dumb work-arounds like try/catch around this to get a bit faster here, since the compiler couldn't optimize it before. Applications typically shouldn't use this function in performance sensitive places, as it hints that their design is flawed, but might as well make it faster anyways. and optimize invokelatest kwcall too, while we are at it --- base/essentials.jl | 13 ++++++------- src/builtin_proto.h | 4 ++-- src/builtins.c | 19 +++++++++---------- src/codegen.cpp | 4 ++-- src/jl_exported_funcs.inc | 6 +++--- src/jlfrontend.scm | 4 ++-- src/staticdata.c | 2 +- test/cmdlineargs.jl | 12 ++++++------ 8 files changed, 31 insertions(+), 33 deletions(-) diff --git a/base/essentials.jl b/base/essentials.jl index 31c746bab5d28..aba10a1a011c6 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -703,12 +703,11 @@ call obsolete versions of a function `f`. `f` directly, and the type of the result cannot be inferred by the compiler.) """ function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...) + kwargs = Base.merge(NamedTuple(), kwargs) if isempty(kwargs) - return Core._apply_latest(f, args) + return Core._call_latest(f, args...) end - # We use a closure (`inner`) to handle kwargs. - inner() = f(args...; kwargs...) - Core._apply_latest(inner) + return Core._call_latest(Core.kwfunc(f), kwargs, f, args...) end """ @@ -738,11 +737,11 @@ of [`invokelatest`](@ref). world age refers to system state unrelated to the main Julia session. """ function invoke_in_world(world::UInt, @nospecialize(f), @nospecialize args...; kwargs...) + kwargs = Base.merge(NamedTuple(), kwargs) if isempty(kwargs) - return Core._apply_in_world(world, f, args) + return Core._call_in_world(world, f, args...) end - inner() = f(args...; kwargs...) - Core._apply_in_world(world, inner) + return Core._call_in_world(world, Core.kwfunc(f), kwargs, f, args...) end # TODO: possibly make this an intrinsic diff --git a/src/builtin_proto.h b/src/builtin_proto.h index 8021c404bd5e7..e66af64eb4118 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -23,8 +23,8 @@ DECLARE_BUILTIN(throw); DECLARE_BUILTIN(is); DECLARE_BUILTIN(typeof); DECLARE_BUILTIN(sizeof); DECLARE_BUILTIN(issubtype); DECLARE_BUILTIN(isa); DECLARE_BUILTIN(_apply); DECLARE_BUILTIN(_apply_pure); -DECLARE_BUILTIN(_apply_latest); DECLARE_BUILTIN(_apply_iterate); -DECLARE_BUILTIN(_apply_in_world); +DECLARE_BUILTIN(_call_latest); DECLARE_BUILTIN(_apply_iterate); +DECLARE_BUILTIN(_call_in_world); DECLARE_BUILTIN(isdefined); DECLARE_BUILTIN(nfields); DECLARE_BUILTIN(tuple); DECLARE_BUILTIN(svec); DECLARE_BUILTIN(getfield); DECLARE_BUILTIN(setfield); diff --git a/src/builtins.c b/src/builtins.c index 8cbce6c5b6188..de5318b59295e 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -712,21 +712,21 @@ JL_CALLABLE(jl_f__apply_pure) return ret; } -// this is like `_apply`, but always runs in the newest world -JL_CALLABLE(jl_f__apply_latest) +// this is like a regular call, but always runs in the newest world +JL_CALLABLE(jl_f__call_latest) { jl_ptls_t ptls = jl_get_ptls_states(); size_t last_age = ptls->world_age; if (!ptls->in_pure_callback) ptls->world_age = jl_world_counter; - jl_value_t *ret = jl_f__apply(NULL, args, nargs); + jl_value_t *ret = jl_apply(args, nargs); ptls->world_age = last_age; return ret; } -// Like `_apply`, but runs in the specified world. +// Like call_in_world, but runs in the specified world. // If world > jl_world_counter, run in the latest world. -JL_CALLABLE(jl_f__apply_in_world) +JL_CALLABLE(jl_f__call_in_world) { JL_NARGSV(_apply_in_world, 2); jl_ptls_t ptls = jl_get_ptls_states(); @@ -734,10 +734,9 @@ JL_CALLABLE(jl_f__apply_in_world) JL_TYPECHK(_apply_in_world, ulong, args[0]); size_t world = jl_unbox_ulong(args[0]); world = world <= jl_world_counter ? world : jl_world_counter; - if (!ptls->in_pure_callback) { + if (!ptls->in_pure_callback) ptls->world_age = world; - } - jl_value_t *ret = do_apply(NULL, args+1, nargs-1, NULL); + jl_value_t *ret = jl_apply(&args[1], nargs - 1); ptls->world_age = last_age; return ret; } @@ -1555,8 +1554,8 @@ void jl_init_primitives(void) JL_GC_DISABLED jl_builtin__expr = add_builtin_func("_expr", jl_f__expr); jl_builtin_svec = add_builtin_func("svec", jl_f_svec); add_builtin_func("_apply_pure", jl_f__apply_pure); - add_builtin_func("_apply_latest", jl_f__apply_latest); - add_builtin_func("_apply_in_world", jl_f__apply_in_world); + add_builtin_func("_call_latest", jl_f__call_latest); + add_builtin_func("_call_in_world", jl_f__call_in_world); add_builtin_func("_typevar", jl_f__typevar); add_builtin_func("_structtype", jl_f__structtype); add_builtin_func("_abstracttype", jl_f__abstracttype); diff --git a/src/codegen.cpp b/src/codegen.cpp index a52d02a99c1ea..a4359a50abe6c 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -840,8 +840,8 @@ static const std::map builtin_func_map = { { &jl_f__apply, new JuliaFunction{"jl_f__apply", get_func_sig, get_func_attrs} }, { &jl_f__apply_iterate, new JuliaFunction{"jl_f__apply_iterate", get_func_sig, get_func_attrs} }, { &jl_f__apply_pure, new JuliaFunction{"jl_f__apply_pure", get_func_sig, get_func_attrs} }, - { &jl_f__apply_latest, new JuliaFunction{"jl_f__apply_latest", get_func_sig, get_func_attrs} }, - { &jl_f__apply_in_world, new JuliaFunction{"jl_f__apply_in_world", get_func_sig, get_func_attrs} }, + { &jl_f__call_latest, new JuliaFunction{"jl_f__call_latest", get_func_sig, get_func_attrs} }, + { &jl_f__call_in_world, new JuliaFunction{"jl_f__call_in_world", get_func_sig, get_func_attrs} }, { &jl_f_throw, new JuliaFunction{"jl_f_throw", get_func_sig, get_func_attrs} }, { &jl_f_tuple, jltuple_func }, { &jl_f_svec, new JuliaFunction{"jl_f_svec", get_func_sig, get_func_attrs} }, diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index d12e8794f5ce4..4476a736c81b0 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -178,12 +178,12 @@ XX(jl_expand_with_loc_warn) \ XX(jl_extern_c) \ XX(jl_f__abstracttype) \ - XX(jl_f_applicable) \ XX(jl_f__apply) \ - XX(jl_f__apply_in_world) \ XX(jl_f__apply_iterate) \ - XX(jl_f__apply_latest) \ XX(jl_f__apply_pure) \ + XX(jl_f__call_in_world) \ + XX(jl_f__call_latest) \ + XX(jl_f_applicable) \ XX(jl_f_apply_type) \ XX(jl_f_arrayref) \ XX(jl_f_arrayset) \ diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm index 39a3adc3ee467..fb3e732d41ca0 100644 --- a/src/jlfrontend.scm +++ b/src/jlfrontend.scm @@ -191,11 +191,11 @@ (= (call include ,x) (block ,@loc - (call (core _apply_latest) (top include) (call (core svec) ,name ,x)))) + (call (core _call_latest) (top include) ,name ,x))) (= (call include (:: ,mex (top Function)) ,x) (block ,@loc - (call (core _apply_latest) (top include) (call (core svec) ,mex ,name ,x)))))) + (call (core _call_latest) (top include) ,mex ,name ,x))))) 'none 0)) ; run whole frontend on a string. useful for testing. diff --git a/src/staticdata.c b/src/staticdata.c index 5b63c6d4fcbe6..64a2063fff843 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -237,7 +237,7 @@ void *native_functions; static const jl_fptr_args_t id_to_fptrs[] = { &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa, &jl_f_typeassert, &jl_f__apply, &jl_f__apply_iterate, &jl_f__apply_pure, - &jl_f__apply_latest, &jl_f__apply_in_world, &jl_f_isdefined, + &jl_f__call_latest, &jl_f__call_in_world, &jl_f_isdefined, &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call, &jl_f_invoke_kwsorter, &jl_f_getfield, &jl_f_setfield, &jl_f_fieldtype, &jl_f_nfields, &jl_f_arrayref, &jl_f_const_arrayref, &jl_f_arrayset, &jl_f_arraysize, &jl_f_apply_type, diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 51b5a6bc77454..60a3afcbcce66 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -316,15 +316,15 @@ let exename = `$(Base.julia_cmd()) --startup-file=no` @test popfirst!(got) == " 80 []" if Sys.WORD_SIZE == 64 # P64 pools with 64 bit tags - @test popfirst!(got) == " 32 Base.invokelatest(g, 0)" - @test popfirst!(got) == " 48 Base.invokelatest(g, x)" + @test popfirst!(got) == " 16 Base.invokelatest(g, 0)" + @test popfirst!(got) == " 32 Base.invokelatest(g, x)" elseif 12 == (() -> @allocated ccall(:jl_gc_allocobj, Ptr{Cvoid}, (Csize_t,), 8))() # See if we have a 12-byte pool with 32 bit tags (MAX_ALIGN = 4) - @test popfirst!(got) == " 24 Base.invokelatest(g, 0)" - @test popfirst!(got) == " 36 Base.invokelatest(g, x)" + @test popfirst!(got) == " 12 Base.invokelatest(g, 0)" + @test popfirst!(got) == " 24 Base.invokelatest(g, x)" else # MAX_ALIGN >= 8 - @test popfirst!(got) == " 16 Base.invokelatest(g, 0)" - @test popfirst!(got) == " 48 Base.invokelatest(g, x)" + @test popfirst!(got) == " 8 Base.invokelatest(g, 0)" + @test popfirst!(got) == " 32 Base.invokelatest(g, x)" end @test popfirst!(got) == " 80 []" @test popfirst!(got) == " - end"