diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index 4a61ef9591bacf..6d9bbb5586ff5c 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -301,9 +301,6 @@ mono_interp_error_cleanup (MonoError *error); gboolean mono_interp_is_method_multicastdelegate_invoke (MonoMethod *method); -MONO_NEVER_INLINE void -mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClauseArgs *clause_args); - #if HOST_BROWSER gboolean diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 0a2b6e2770285d..1c93cac1dc6b25 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -102,6 +102,9 @@ struct FrameClauseArgs { gboolean run_until_end; }; +static MONO_NEVER_INLINE void +mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClauseArgs *clause_args); + /* * This code synchronizes with interp_mark_stack () using compiler memory barriers. */ @@ -3698,7 +3701,7 @@ max_d (double lhs, double rhs) * to return error information. * FRAME is only valid until the next call to alloc_frame (). */ -MONO_NEVER_INLINE void +static MONO_NEVER_INLINE void mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClauseArgs *clause_args) { InterpMethod *cmethod; @@ -3797,6 +3800,11 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause MINT_IN_CASE(MINT_DEF) MINT_IN_CASE(MINT_DUMMY_USE) MINT_IN_CASE(MINT_TIER_PATCHPOINT_DATA) +#ifndef HOST_BROWSER + MINT_IN_CASE(MINT_TIER_NOP_JITERPRETER) + MINT_IN_CASE(MINT_TIER_PREPARE_JITERPRETER) + MINT_IN_CASE(MINT_TIER_ENTER_JITERPRETER) +#endif g_assert_not_reached (); MINT_IN_BREAK; MINT_IN_CASE(MINT_BREAK) @@ -7518,6 +7526,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; ip += 5; MINT_IN_BREAK; } + #ifdef HOST_BROWSER MINT_IN_CASE(MINT_TIER_NOP_JITERPRETER) { ip += 3; @@ -7608,21 +7617,6 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; ip = (guint16*) (((guint8*)ip) + offset); MINT_IN_BREAK; } -#else - MINT_IN_CASE(MINT_TIER_NOP_JITERPRETER) { - g_assert_not_reached (); - MINT_IN_BREAK; - } - - MINT_IN_CASE(MINT_TIER_PREPARE_JITERPRETER) { - g_assert_not_reached (); - MINT_IN_BREAK; - } - - MINT_IN_CASE(MINT_TIER_ENTER_JITERPRETER) { - g_assert_not_reached (); - MINT_IN_BREAK; - } #endif #if !USE_COMPUTED_GOTO @@ -8611,4 +8605,59 @@ mono_interp_is_method_multicastdelegate_invoke (MonoMethod *method) { return is_method_multicastdelegate_invoke (method); } + +// after interp_entry_prologue the wrapper will set up all the argument values +// in the correct place and compute the stack offset, then it passes that in to this +// function in order to actually enter the interpreter and process the return value +EMSCRIPTEN_KEEPALIVE void +mono_jiterp_interp_entry (JiterpEntryData *_data, stackval *sp_args, void *res) +{ + JiterpEntryDataHeader header; + MonoType *type; + + // Copy the scratch buffer into a local variable. This is necessary for us to be + // reentrant-safe because mono_interp_exec_method could end up hitting the trampoline + // again + g_assert(_data); + header = _data->header; + + g_assert(header.rmethod); + g_assert(header.rmethod->method); + g_assert(sp_args); + + stackval *sp = (stackval*)header.context->stack_pointer; + + InterpFrame frame = {0}; + frame.imethod = header.rmethod; + frame.stack = sp; + frame.retval = sp; + + header.context->stack_pointer = (guchar*)sp_args; + g_assert ((guchar*)sp_args < header.context->stack_end); + + MONO_ENTER_GC_UNSAFE; + mono_interp_exec_method (&frame, header.context, NULL); + MONO_EXIT_GC_UNSAFE; + + header.context->stack_pointer = (guchar*)sp; + + if (header.rmethod->needs_thread_attach) + mono_threads_detach_coop (header.orig_domain, &header.attach_cookie); + + mono_jiterp_check_pending_unwind (header.context); + + if (mono_llvm_only) { + if (header.context->has_resume_state) + /* The exception will be handled in a frame above us */ + mono_llvm_cpp_throw_exception (); + } else { + g_assert (!header.context->has_resume_state); + } + + // The return value is at the bottom of the stack, after the locals space + type = header.rmethod->rtype; + if (type->type != MONO_TYPE_VOID) + mono_jiterp_stackval_to_data (type, frame.stack, res); +} + #endif diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index 071040ee76b502..4c6105548a170a 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -35,6 +35,7 @@ void jiterp_preserve_module (void); #include "interp-intrins.h" #include "tiering.h" +#include #include #include #include @@ -437,6 +438,43 @@ mono_jiterp_conv_ovf (void *dest, void *src, int opcode) { return 0; } +#define JITERP_RELOP(opcode, type, op, noorder) \ + case opcode: \ + { \ + if (is_unordered) \ + return noorder; \ + else \ + return ((type)lhs op (type)rhs); \ + } + +EMSCRIPTEN_KEEPALIVE int +mono_jiterp_relop_fp (double lhs, double rhs, int opcode) { + gboolean is_unordered = mono_isunordered (lhs, rhs); + switch (opcode) { + JITERP_RELOP(MINT_CEQ_R4, float, ==, 0); + JITERP_RELOP(MINT_CEQ_R8, double, ==, 0); + JITERP_RELOP(MINT_CNE_R4, float, !=, 1); + JITERP_RELOP(MINT_CNE_R8, double, !=, 1); + JITERP_RELOP(MINT_CGT_R4, float, >, 0); + JITERP_RELOP(MINT_CGT_R8, double, >, 0); + JITERP_RELOP(MINT_CGE_R4, float, >=, 0); + JITERP_RELOP(MINT_CGE_R8, double, >=, 0); + JITERP_RELOP(MINT_CGT_UN_R4, float, >, 1); + JITERP_RELOP(MINT_CGT_UN_R8, double, >, 1); + JITERP_RELOP(MINT_CLT_R4, float, <, 0); + JITERP_RELOP(MINT_CLT_R8, double, <, 0); + JITERP_RELOP(MINT_CLT_UN_R4, float, <, 1); + JITERP_RELOP(MINT_CLT_UN_R8, double, <, 1); + JITERP_RELOP(MINT_CLE_R4, float, <=, 0); + JITERP_RELOP(MINT_CLE_R8, double, <=, 0); + + default: + g_assert_not_reached(); + } +} + +#undef JITERP_RELOP + // we use these helpers at JIT time to figure out where to do memory loads and stores EMSCRIPTEN_KEEPALIVE size_t mono_jiterp_get_offset_of_vtable_initialized_flag () { @@ -518,34 +556,6 @@ mono_jiterp_adjust_abort_count (MintOpcode opcode, gint32 delta) { return jiterpreter_abort_counts[opcode]; } -typedef struct { - InterpMethod *rmethod; - ThreadContext *context; - gpointer orig_domain; - gpointer attach_cookie; -} JiterpEntryDataHeader; - -// we optimize delegate calls by attempting to cache the delegate invoke -// target - this will improve performance when the same delegate is invoked -// repeatedly inside a loop -typedef struct { - MonoDelegate *delegate_invoke_is_for; - MonoMethod *delegate_invoke; - InterpMethod *delegate_invoke_rmethod; -} JiterpEntryDataCache; - -// jitted interp_entry wrappers use custom tracking data structures -// that are allocated in the heap, one per wrapper -// FIXME: For thread safety we need to make these thread-local or stack-allocated -// Note that if we stack allocate these the cache will need to move somewhere else -typedef struct { - // We split the cache out from the important data so that when - // jiterp_interp_entry copies the important data it doesn't have - // to also copy the cache. This reduces overhead slightly - JiterpEntryDataHeader header; - JiterpEntryDataCache cache; -} JiterpEntryData; - // at the start of a jitted interp_entry wrapper, this is called to perform initial setup // like resolving the target for delegates and setting up the thread context // inlining this into the wrappers would make them unnecessarily big and complex @@ -604,60 +614,6 @@ mono_jiterp_interp_entry_prologue (JiterpEntryData *data, void *this_arg) return sp_args; } -// after interp_entry_prologue the wrapper will set up all the argument values -// in the correct place and compute the stack offset, then it passes that in to this -// function in order to actually enter the interpreter and process the return value -EMSCRIPTEN_KEEPALIVE void -mono_jiterp_interp_entry (JiterpEntryData *_data, stackval *sp_args, void *res) -{ - JiterpEntryDataHeader header; - MonoType *type; - - // Copy the scratch buffer into a local variable. This is necessary for us to be - // reentrant-safe because mono_interp_exec_method could end up hitting the trampoline - // again - jiterp_assert(_data); - header = _data->header; - - jiterp_assert(header.rmethod); - jiterp_assert(header.rmethod->method); - jiterp_assert(sp_args); - - stackval *sp = (stackval*)header.context->stack_pointer; - - InterpFrame frame = {0}; - frame.imethod = header.rmethod; - frame.stack = sp; - frame.retval = sp; - - header.context->stack_pointer = (guchar*)sp_args; - g_assert ((guchar*)sp_args < header.context->stack_end); - - MONO_ENTER_GC_UNSAFE; - mono_interp_exec_method (&frame, header.context, NULL); - MONO_EXIT_GC_UNSAFE; - - header.context->stack_pointer = (guchar*)sp; - - if (header.rmethod->needs_thread_attach) - mono_threads_detach_coop (header.orig_domain, &header.attach_cookie); - - mono_jiterp_check_pending_unwind (header.context); - - if (mono_llvm_only) { - if (header.context->has_resume_state) - /* The exception will be handled in a frame above us */ - mono_llvm_cpp_throw_exception (); - } else { - g_assert (!header.context->has_resume_state); - } - - // The return value is at the bottom of the stack, after the locals space - type = header.rmethod->rtype; - if (type->type != MONO_TYPE_VOID) - mono_jiterp_stackval_to_data (type, frame.stack, res); -} - // should_abort_trace returns one of these codes depending on the opcode and current state #define TRACE_IGNORE -1 #define TRACE_CONTINUE 0 diff --git a/src/mono/mono/mini/interp/jiterpreter.h b/src/mono/mono/mini/interp/jiterpreter.h index acb7cc8002552f..6856b6d58a4be0 100644 --- a/src/mono/mono/mini/interp/jiterpreter.h +++ b/src/mono/mono/mini/interp/jiterpreter.h @@ -83,6 +83,41 @@ mono_jiterp_do_jit_call_indirect ( gpointer cb, gpointer arg, gboolean *out_thrown ); +#ifdef __MONO_MINI_INTERPRETER_INTERNALS_H__ + +typedef struct { + InterpMethod *rmethod; + ThreadContext *context; + gpointer orig_domain; + gpointer attach_cookie; +} JiterpEntryDataHeader; + +// we optimize delegate calls by attempting to cache the delegate invoke +// target - this will improve performance when the same delegate is invoked +// repeatedly inside a loop +typedef struct { + MonoDelegate *delegate_invoke_is_for; + MonoMethod *delegate_invoke; + InterpMethod *delegate_invoke_rmethod; +} JiterpEntryDataCache; + +// jitted interp_entry wrappers use custom tracking data structures +// that are allocated in the heap, one per wrapper +// FIXME: For thread safety we need to make these thread-local or stack-allocated +// Note that if we stack allocate these the cache will need to move somewhere else +typedef struct { + // We split the cache out from the important data so that when + // jiterp_interp_entry copies the important data it doesn't have + // to also copy the cache. This reduces overhead slightly + JiterpEntryDataHeader header; + JiterpEntryDataCache cache; +} JiterpEntryData; + +void +mono_jiterp_interp_entry (JiterpEntryData *_data, stackval *sp_args, void *res); + +#endif // __MONO_MINI_INTERPRETER_INTERNALS_H__ + extern WasmDoJitCall jiterpreter_do_jit_call; #endif // HOST_BROWSER diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index bd572d7d70bd18..d7d71ac3b6ea27 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -10071,8 +10071,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG interp_optimize_code (td); interp_alloc_offsets (td); #if HOST_BROWSER - if (mono_interp_tiering_enabled ()) - jiterp_insert_entry_points (td); + jiterp_insert_entry_points (td); #endif } diff --git a/src/mono/wasm/runtime/jiterpreter-support.ts b/src/mono/wasm/runtime/jiterpreter-support.ts index ceea427a1d5c98..322378efc9ccf6 100644 --- a/src/mono/wasm/runtime/jiterpreter-support.ts +++ b/src/mono/wasm/runtime/jiterpreter-support.ts @@ -7,7 +7,8 @@ import { WasmOpcode } from "./jiterpreter-opcodes"; import cwraps from "./cwraps"; export const maxFailures = 2, - maxMemsetSize = 64; + maxMemsetSize = 64, + maxMemmoveSize = 64; // uint16 export declare interface MintOpcodePtr extends NativePointer { @@ -661,13 +662,14 @@ export function try_append_memset_fast (builder: WasmBuilder, localOffset: numbe if (count >= maxMemsetSize) return false; + const destLocal = destOnStack ? "math_lhs32" : "pLocals"; if (destOnStack) builder.local("math_lhs32", WasmOpcode.set_local); let offset = destOnStack ? 0 : localOffset; // Do blocks of 8-byte sets first for smaller/faster code while (count >= 8) { - builder.local(destOnStack ? "math_lhs32" : "pLocals"); + builder.local(destLocal); builder.i52_const(0); builder.appendU8(WasmOpcode.i64_store); builder.appendMemarg(offset, 0); @@ -677,7 +679,7 @@ export function try_append_memset_fast (builder: WasmBuilder, localOffset: numbe // Then set the remaining 0-7 bytes while (count >= 1) { - builder.local(destOnStack ? "math_lhs32" : "pLocals"); + builder.local(destLocal); builder.i32_const(0); let localCount = count % 4; switch (localCount) { @@ -716,45 +718,100 @@ export function append_memset_dest (builder: WasmBuilder, value: number, count: builder.appendU8(0); } +export function try_append_memmove_fast ( + builder: WasmBuilder, destLocalOffset: number, srcLocalOffset: number, + count: number, addressesOnStack: boolean +) { + let destLocal = "math_lhs32", srcLocal = "math_rhs32"; + + if (count <= 0) { + if (addressesOnStack) { + builder.appendU8(WasmOpcode.drop); + builder.appendU8(WasmOpcode.drop); + } + return true; + } + + if (count >= maxMemmoveSize) + return false; + + if (addressesOnStack) { + builder.local(srcLocal, WasmOpcode.set_local); + builder.local(destLocal, WasmOpcode.set_local); + } else { + destLocal = srcLocal = "pLocals"; + } + + let destOffset = addressesOnStack ? 0 : destLocalOffset, + srcOffset = addressesOnStack ? 0 : srcLocalOffset; + + // Do blocks of 8-byte copies first for smaller/faster code + while (count >= 8) { + builder.local(destLocal); + builder.local(srcLocal); + builder.appendU8(WasmOpcode.i64_load); + builder.appendMemarg(srcOffset, 0); + builder.appendU8(WasmOpcode.i64_store); + builder.appendMemarg(destOffset, 0); + destOffset += 8; + srcOffset += 8; + count -= 8; + } + + // Then copy the remaining 0-7 bytes + while (count >= 1) { + let loadOp : WasmOpcode, storeOp : WasmOpcode; + let localCount = count % 4; + switch (localCount) { + case 0: + // since we did %, 4 bytes turned into 0. gotta fix that up to avoid infinite loop + localCount = 4; + loadOp = WasmOpcode.i32_load; + storeOp = WasmOpcode.i32_store; + break; + default: + case 1: + localCount = 1; // silence tsc + loadOp = WasmOpcode.i32_load8_s; + storeOp = WasmOpcode.i32_store8; + break; + case 3: + case 2: + // For 3 bytes we just want to do a 2 write then a 1 + localCount = 2; + loadOp = WasmOpcode.i32_load16_s; + storeOp = WasmOpcode.i32_store16; + break; + + } + + builder.local(destLocal); + builder.local(srcLocal); + builder.appendU8(loadOp); + builder.appendMemarg(srcOffset, 0); + builder.appendU8(storeOp); + builder.appendMemarg(destOffset, 0); + srcOffset += localCount; + destOffset += localCount; + count -= localCount; + } + + return true; +} + // expects dest then source to have been pushed onto wasm stack export function append_memmove_dest_src (builder: WasmBuilder, count: number) { - // FIXME: Unroll this like memset, since we now know that the memory ops generate expensive - // function calls - switch (count) { - case 1: - builder.appendU8(WasmOpcode.i32_load8_u); - builder.appendMemarg(0, 0); - builder.appendU8(WasmOpcode.i32_store8); - builder.appendMemarg(0, 0); - return true; - case 2: - builder.appendU8(WasmOpcode.i32_load16_u); - builder.appendMemarg(0, 0); - builder.appendU8(WasmOpcode.i32_store16); - builder.appendMemarg(0, 0); - return true; - case 4: - builder.appendU8(WasmOpcode.i32_load); - builder.appendMemarg(0, 0); - builder.appendU8(WasmOpcode.i32_store); - builder.appendMemarg(0, 0); - return true; - case 8: - builder.appendU8(WasmOpcode.i64_load); - builder.appendMemarg(0, 0); - builder.appendU8(WasmOpcode.i64_store); - builder.appendMemarg(0, 0); - return true; - default: - // spec: pop n, pop s, pop d, copy n bytes from s to d - builder.i32_const(count); - // great encoding isn't it - builder.appendU8(WasmOpcode.PREFIX_sat); - builder.appendU8(10); - builder.appendU8(0); - builder.appendU8(0); - return true; - } + if (try_append_memmove_fast(builder, 0, 0, count, true)) + return true; + + // spec: pop n, pop s, pop d, copy n bytes from s to d + builder.i32_const(count); + // great encoding isn't it + builder.appendU8(WasmOpcode.PREFIX_sat); + builder.appendU8(10); + builder.appendU8(0); + builder.appendU8(0); + return true; } export function recordFailure () : void { @@ -817,7 +874,7 @@ const optionNames : { [jsName: string] : string } = { "countBailouts": "jiterpreter-count-bailouts", "dumpTraces": "jiterpreter-dump-traces", "minimumTraceLength": "jiterpreter-minimum-trace-length", - "minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count", + "minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count" }; let optionsVersion = -1; diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index 6b7addc8e9c6e8..6d9516f0255b59 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -15,7 +15,8 @@ import { MintOpcodePtr, WasmValtype, WasmBuilder, addWasmFunctionPointer, copyIntoScratchBuffer, _now, elapsedTimes, append_memset_dest, append_memmove_dest_src, counters, getRawCwrap, importDef, - JiterpreterOptions, getOptions, recordFailure, try_append_memset_fast + JiterpreterOptions, getOptions, recordFailure, try_append_memset_fast, + try_append_memmove_fast } from "./jiterpreter-support"; // Controls miscellaneous diagnostic output. @@ -249,6 +250,7 @@ function getTraceImports () { ["ld_del_ptr", "ld_del_ptr", getRawCwrap("mono_jiterp_ld_delegate_method_ptr")], ["ldtsflda", "ldtsflda", getRawCwrap("mono_jiterp_ldtsflda")], ["conv_ovf", "conv_ovf", getRawCwrap("mono_jiterp_conv_ovf")], + ["relop_fp", "relop_fp", getRawCwrap("mono_jiterp_relop_fp")], ]; if (instrumentedMethodNames.length > 0) { @@ -489,6 +491,13 @@ function generate_wasm ( "opcode": WasmValtype.i32, }, WasmValtype.i32 ); + builder.defineType( + "relop_fp", { + "lhs": WasmValtype.f64, + "rhs": WasmValtype.f64, + "opcode": WasmValtype.i32, + }, WasmValtype.i32 + ); builder.generateTypeSection(); @@ -1350,6 +1359,9 @@ function append_memset_local (builder: WasmBuilder, localOffset: number, value: } function append_memmove_local_local (builder: WasmBuilder, destLocalOffset: number, sourceLocalOffset: number, count: number) { + if (try_append_memmove_fast(builder, destLocalOffset, sourceLocalOffset, count, false)) + return true; + // spec: pop n, pop s, pop d, copy n bytes from s to d append_ldloca(builder, destLocalOffset); append_ldloca(builder, sourceLocalOffset); @@ -1812,6 +1824,27 @@ const unopTable : { [opcode: number]: OpRec3 | undefined } = { [MintOpcode.MINT_SHR_UN_I8_IMM]: [WasmOpcode.i64_shr_u, WasmOpcode.i64_load, WasmOpcode.i64_store], }; +// HACK: Generating correct wasm for these is non-trivial so we hand them off to C. +// The opcode specifies whether the operands need to be promoted first. +const intrinsicFpBinops : { [opcode: number] : WasmOpcode } = { + [MintOpcode.MINT_CEQ_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CEQ_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CNE_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CNE_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CGT_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CGT_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CGE_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CGE_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CGT_UN_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CGT_UN_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CLT_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CLT_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CLT_UN_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CLT_UN_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CLE_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CLE_R8]: WasmOpcode.nop, +}; + const binopTable : { [opcode: number]: OpRec3 | OpRec4 | undefined } = { [MintOpcode.MINT_ADD_I4]: [WasmOpcode.i32_add, WasmOpcode.i32_load, WasmOpcode.i32_store], [MintOpcode.MINT_ADD_OVF_I4]:[WasmOpcode.i32_add, WasmOpcode.i32_load, WasmOpcode.i32_store], @@ -1882,25 +1915,6 @@ const binopTable : { [opcode: number]: OpRec3 | OpRec4 | undefined } = { [MintOpcode.MINT_CLE_UN_I8]: [WasmOpcode.i64_le_u, WasmOpcode.i64_load, WasmOpcode.i32_store], [MintOpcode.MINT_CGE_UN_I8]: [WasmOpcode.i64_ge_u, WasmOpcode.i64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CEQ_R4]: [WasmOpcode.f32_eq, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CNE_R4]: [WasmOpcode.f32_ne, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CLT_R4]: [WasmOpcode.f32_lt, WasmOpcode.f32_load, WasmOpcode.i32_store], - // FIXME: What are these, semantically? - [MintOpcode.MINT_CLT_UN_R4]: [WasmOpcode.f32_lt, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CGT_R4]: [WasmOpcode.f32_gt, WasmOpcode.f32_load, WasmOpcode.i32_store], - // FIXME - [MintOpcode.MINT_CGT_UN_R4]: [WasmOpcode.f32_gt, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CLE_R4]: [WasmOpcode.f32_le, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CGE_R4]: [WasmOpcode.f32_ge, WasmOpcode.f32_load, WasmOpcode.i32_store], - - [MintOpcode.MINT_CEQ_R8]: [WasmOpcode.f64_eq, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CNE_R8]: [WasmOpcode.f64_ne, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CLT_R8]: [WasmOpcode.f64_lt, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CGT_R8]: [WasmOpcode.f64_gt, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CLE_R8]: [WasmOpcode.f64_le, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CGE_R8]: [WasmOpcode.f64_ge, WasmOpcode.f64_load, WasmOpcode.i32_store], - - // FIXME: unordered float comparisons }; const relopbranchTable : { [opcode: number]: [comparisonOpcode: MintOpcode, immediateOpcode: WasmOpcode | false, isSafepoint: boolean] | MintOpcode | undefined } = { @@ -1993,6 +2007,22 @@ function emit_binop (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode info : OpRec3 | OpRec4 | undefined, operandsCached = false; + const intrinsicFpBinop = intrinsicFpBinops[opcode]; + if (intrinsicFpBinop) { + builder.local("pLocals"); + const isF64 = intrinsicFpBinop == WasmOpcode.nop; + append_ldloc(builder, getArgU16(ip, 2), isF64 ? WasmOpcode.f64_load : WasmOpcode.f32_load); + if (!isF64) + builder.appendU8(intrinsicFpBinop); + append_ldloc(builder, getArgU16(ip, 3), isF64 ? WasmOpcode.f64_load : WasmOpcode.f32_load); + if (!isF64) + builder.appendU8(intrinsicFpBinop); + builder.i32_const(opcode); + builder.callImport("relop_fp"); + append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store); + return true; + } + switch (opcode) { case MintOpcode.MINT_REM_R4: case MintOpcode.MINT_REM_R8: @@ -2909,9 +2939,11 @@ export function mono_interp_tier_prepare_jiterpreter ( else info.hitCount++; - if (info.hitCount < mostRecentOptions.minimumTraceHitCount) + const minHitCount = mostRecentOptions.minimumTraceHitCount; + + if (info.hitCount < minHitCount) return JITERPRETER_TRAINING; - else if (info.hitCount === mostRecentOptions.minimumTraceHitCount) { + else if (info.hitCount === minHitCount) { counters.traceCandidates++; let methodFullName: string | undefined; if (trapTraceErrors || mostRecentOptions.estimateHeat || (instrumentedMethodNames.length > 0)) { @@ -2997,32 +3029,45 @@ export function jiterpreter_dump_stats (b?: boolean) { // Filter out noisy methods that we don't care about optimizing if (traces[i].name!.indexOf("Xunit.") >= 0) continue; + // FIXME: A single hot method can contain many failed traces. This creates a lot of noise // here and also likely indicates the jiterpreter would add a lot of overhead to it // Filter out aborts that aren't meaningful since it is unlikely to ever make sense // to fix them, either because they are rarely used or because putting them in // traces would not meaningfully improve performance - if (traces[i].abortReason && traces[i].abortReason!.startsWith("mono_icall_")) - continue; - switch (traces[i].abortReason) { - case "trace-too-small": - case "call": - case "callvirt.fast": - case "calli.nat.fast": - case "calli.nat": - case "call.delegate": - case "newobj": - case "newobj_vt": - case "intrins_ordinal_ignore_case_ascii": - case "intrins_marvin_block": - case "intrins_ascii_chars_to_uppercase": - case "switch": - case "call_handler.s": - case "rethrow": - case "endfinally": - case "end-of-body": + if (traces[i].abortReason) { + if (traces[i].abortReason!.startsWith("mono_icall_") || + traces[i].abortReason!.startsWith("ret.")) continue; + + switch (traces[i].abortReason) { + // not feasible to fix + case "trace-too-small": + case "call": + case "callvirt.fast": + case "calli.nat.fast": + case "calli.nat": + case "call.delegate": + case "newobj": + case "newobj_vt": + case "newobj_slow": + case "switch": + case "call_handler.s": + case "rethrow": + case "endfinally": + case "end-of-body": + case "ret": + continue; + + // not worth implementing / too difficult + case "intrins_ordinal_ignore_case_ascii": + case "intrins_marvin_block": + case "intrins_ascii_chars_to_uppercase": + case "newarr": + continue; + } } + c++; console.log(`${traces[i].name} @${traces[i].ip} (${traces[i].hitCount} hits) ${traces[i].abortReason}`); }