diff --git a/buildspecs/core.feature b/buildspecs/core.feature index 155a8568b5b..5b5e4bf09aa 100644 --- a/buildspecs/core.feature +++ b/buildspecs/core.feature @@ -112,6 +112,7 @@ SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-excepti + diff --git a/buildspecs/j9.flags b/buildspecs/j9.flags index 152c908e30b..b5644ef97c8 100644 --- a/buildspecs/j9.flags +++ b/buildspecs/j9.flags @@ -1784,6 +1784,10 @@ Only available on zOS Enables common dependencies between OpenJ9 and OpenJDK MethodHandles. Disables common dependencies between OpenJ9 and OpenJDK MethodHandles. + + Enables MicroJIT support. + + Turns on module support diff --git a/doc/compiler/MicroJIT/DSL.md b/doc/compiler/MicroJIT/DSL.md new file mode 100644 index 00000000000..8a834d93408 --- /dev/null +++ b/doc/compiler/MicroJIT/DSL.md @@ -0,0 +1,60 @@ + + +# Overview + +MicroJIT's DSL helps new contributors to avoid making common mistakes +when writing new, or editing existing, templates. It helps developers keep +sizes of data types and number of slots separate from each other conceptually +while also keeping their own footprint small. + +# Current Macros + +The current existing DSL macros are: +- pop_single_slot + - Add the size of 1 stack slot to the computation stack pointer +- pop_dual_slot + - Add the size of 2 stack slots to the computation stack pointer +- push_single_slot + - Subtract the size of 1 stack slot from the computation stack pointer +- push_dual_slot + - Subtract the size of 2 stack slots from the computation stack pointer +- _32bit_local_to_rXX_PATCH + - Move a 32-bit value to an r11d-r15d register from a local array slot +- _64bit_local_to_rXX_PATCH + - Move a 64-bit value to a 64-bit register from a local array slot +- _32bit_local_from_rXX_PATCH + - Move a 32-bit value from an r11d-r15d register to a local array slot +- _64bit_local_from_rXX_PATCH + - Move a 64-bit value from a 64-bit register to a local array slot +- _32bit_slot_stack_to_rXX + - Move a 64-bit value to an r11-r15 register from the computation stack +- _32bit_slot_stack_to_eXX + - Move a 32-bit value to an eax-edx register from the computation stack +- _64bit_slot_stack_to_rXX + - Move a 64-bit value to a 64-bit register from the computation stack +- _32bit_slot_stack_from_rXX + - Move a 64-bit value from an r11d-r15d register to the computation stack +- _32bit_slot_stack_from_eXX + - Move a 32-bit value from an eax-edx register to the computation stack +- _64bit_slot_stack_from_rXX + - Move a 64-bit value from a 64-bit register to the computation stack \ No newline at end of file diff --git a/doc/compiler/MicroJIT/Platforms.md b/doc/compiler/MicroJIT/Platforms.md new file mode 100644 index 00000000000..17f15e4613d --- /dev/null +++ b/doc/compiler/MicroJIT/Platforms.md @@ -0,0 +1,30 @@ + + +# List of currently supported platforms + +- AMD64 (a.k.a. x86-64) + +# List of expected future platforms + +- RISC-V +- AArch64 \ No newline at end of file diff --git a/doc/compiler/MicroJIT/README.md b/doc/compiler/MicroJIT/README.md new file mode 100644 index 00000000000..2a99249dd2d --- /dev/null +++ b/doc/compiler/MicroJIT/README.md @@ -0,0 +1,74 @@ + + +# Overview + +MicroJIT is a lightweight template-based Just-In-Time (JIT) compiler that integrates +seamlessly with TR. MicroJIT is designed to be used either in-place-of TR +(where CPU resources are scarce) or to reduce the time between start-up and +TR's first compile (where CPU resources are not a constraint). + +MicroJIT's entry point is at the `wrapped compile` stage of TR's compile process. +When building with a `--enable-microjit` configuration, the VM checks whether +a method should be compiled with MicroJIT or TR before handing off the method to +code generation ([MicroJIT Compile Stages](Stages.md)). + +Being template-based, MicroJIT does not build an intermediate language (IL) the +way that TR does. It instead performs a tight loop over the bytecodes and generates +the patchable assembly for each instance of a JVM Bytecode. This means that porting +MicroJIT to a new platform requires rewriting much of the code generator in that +target platform. The current list of available platforms is available [here](Platforms.md). + +MicroJIT uses a stack-based [vISA](vISA.md) for its internal model of how to execute bytecodes. +This maps cleanly onto the JVM Bytecode, so MicroJIT uses the number of stack slots +for JVM data types that the JVM specification requires wherever the target architecture allows. + +To keep track of the location of parameters and locals, as well as for creating the +required GCStackAtlas, MicroJIT makes use of [side tables](SideTables.md). These +tables are arrays of structures which represent a row in their table. Using these +tables, and some upper bounding, MicroJIT can allocate the required number of rows +in these tables on the stack rather than the heap, helping MicroJIT compile faster. + +The templates MicroJIT uses for its code generation are written in a mix of the +target architecture's assembly (for the NASM assembler) and assembler macros used to +abstract some of the vISA operations away into a domain-specific language ([DSL](DSL.md)). + +To try MicroJIT, first build your JVM with a configuration using the `--enable-microjit` +option (see the openj9-openjdk-jdk8 project). Then, once built, use the +`-Xjit:mjitEnabled=1,mjitCount=20` options. You can use other values for `mjitCount` than +20, but peer-reviewed research shows that this value is a good estimation of the best +count for similar template-based JITs on JVM benchmarks. + +MicroJIT is an experimental feature. It currently does not compile all methods, and will +fail to compile methods with unsupported bytecodes or calling conventions. For details +on supported bytecodes and calling conventions, see our [supported compilations](support.md) +documentation. + +# Topics + +- [MicroJIT Compile Stages](Stages.md) +- [Supported Platforms](Platforms.md) +- [vISA](vISA.md) +- [Supporting Side Tables](SideTables.md) +- [Domain-Specific Language](DSL.md) +- [Supported Compilations](support.md) +
diff --git a/doc/compiler/MicroJIT/SideTables.md b/doc/compiler/MicroJIT/SideTables.md new file mode 100644 index 00000000000..6e05437845b --- /dev/null +++ b/doc/compiler/MicroJIT/SideTables.md @@ -0,0 +1,67 @@ + + +# Overview + +This document contains descriptions of all the side tables used by MicroJIT to +generate code while limiting heap allocations. + +# Register Stack + +The register stack contains the number of stack slots contained +in each register used by TR's linkage. This allows for MicroJIT +to quickly calculate stack offsets for interpreter arguments +on the fly. + +RegisterStack: +- useRAX +- useRSI +- useRDX +- useRCX +- useXMM0 +- useXMM1 +- useXMM2 +- useXMM3 +- useXMM4 +- useXMM5 +- useXMM6 +- useXMM7 +- stackSlotsUsed + +# ParamTableEntry and LocalTableEntry + +This entry is used for both tables, and contains information used +to copy parameters from their linkage locations to their local +array slots, map their locations into GCMaps, and get the size of +their data without storing their data types. + +ParamTableEntry: +| Field | Description | +|:--------------:|:-----------------------------------------------------------------:| +| offset | Offset into stack for loading from stack | +| regNo | Register containing parameter when called by interpreter | +| gcMapOffset | Offset used for lookup by GCMap | +| slots | Number of slots used by this variable when storing in local array | +| size | Number of bytes used by this variable when stored on the stack | +| isReference | Is this variable an object reference | +| onStack | Is this variable currently on the stack | +| notInitialized | Is this entry uninitialized by MicroJIT | diff --git a/doc/compiler/MicroJIT/Stages.md b/doc/compiler/MicroJIT/Stages.md new file mode 100644 index 00000000000..62f2589c083 --- /dev/null +++ b/doc/compiler/MicroJIT/Stages.md @@ -0,0 +1,74 @@ + + +# Overview + +This document contains information on the compilation stages of MicroJIT +from where it diverges from TR. + +# MetaData + +At the entry point to MicroJIT (the `mjit` method of `TR::CompilationInfoPerThreadBase`), +MicroJIT enters the first stage of compilation, building MetaData. It parses the +method signature to learn the types of its parameters, and bytecodes to learn the +number and type of its locals. It then estimates an upperbound for how much space +the compiled method will need and requests it from the allocator. + +During this stage, it also learns the amount of space to save on the stack for calling +child methods, and will eventually also create a control flow graph here to perform +profiling code generation later. The code generator object is constructed and the next +steps begin. + +# Pre-prologue + +The pre-prologue is generated first. This contains many useful code snippets, jump points, +and small bits of meta-data used throughout the JVM that are relative to an individual +compilation of a method. + +# Prologue + +The prologue is then generated, copying parameters into appropriate places and updating +tables as needed. Once these tables are created, the structures used by the JVM are created, +namely the `GCStackAtlas` and its initial GC Maps. At this point, the required sizes of the +stack and local array are known. Hence, the code generator can +generate the code for allocating the stack space needed, moving the parameters into their +local array slots, and setting the Computation Stack and Local Array pointers (See [vISA](vISA.md)). + +# Body + +The code generator then iterates in a tight loop over the bytecode instruction stream, +generating the required template for each bytecode, one at a time, and patching them as +needed. During this phase, if a bytecode that is unsupported (See [supported](support.md)) +is reached, the code generator bubbles the error up to the top level and compilation +is marked as a failure. The method is set not to be attempted with MicroJIT again, and +its compilation threshold is set to the default TR threshold. + +# Cold Area + +The snippets required by MicroJIT are generated last. These snippets can serve a +variety of purposes, but are usually cold code not expected to be executed on every +execution of the method. + +# Clean up + +The remaining meta-data structures are created, MicroJIT updates the internal counts +for compilations, and the unused space allocated for compilation in the start is reclaimed. \ No newline at end of file diff --git a/doc/compiler/MicroJIT/Support.md b/doc/compiler/MicroJIT/Support.md new file mode 100644 index 00000000000..007dfbf070f --- /dev/null +++ b/doc/compiler/MicroJIT/Support.md @@ -0,0 +1,253 @@ + + +MicroJIT does not currently support the compilation of system methods, methods which call system methods and methods which call JNI methods. +Bytecodes currently supported (marked as ✅) and unsupported (marked as ❌) by MicroJIT are: + +**Constants** +- 00 (0x00) nop ✅ +- 01 (0x01) aconst_null ❌ +- 02 (0x02) iconst_m1 ✅ +- 03 (0x03) iconst_0 ✅ +- 04 (0x04) iconst_1 ✅ +- 05 (0x05) iconst_2 ✅ +- 06 (0x06) iconst_3 ✅ +- 07 (0x07) iconst_4 ✅ +- 08 (0x08) iconst_5 ✅ +- 09 (0x09) lconst_0 ✅ +- 10 (0x0a) lconst_1 ✅ +- 11 (0x0b) fconst_0 ✅ +- 12 (0x0c) fconst_1 ✅ +- 13 (0x0d) fconst_2 ✅ +- 14 (0x0e) dconst_0 ✅ +- 15 (0x0f) dconst_1 ✅ +- 16 (0x10) bipush ✅ +- 17 (0x11) sipush ✅ +- 18 (0x12) ldc ❌ +- 19 (0x13) ldc_w ❌ +- 20 (0x14) ldc2_w ❌ + +**Loads** +- 21 (0x15) iload ✅ +- 22 (0x16) lload ✅ +- 23 (0x17) fload ✅ +- 24 (0x18) dload ✅ +- 25 (0x19) aload ✅ +- 26 (0x1a) iload_0 ✅ +- 27 (0x1b) iload_1 ✅ +- 28 (0x1c) iload_2 ✅ +- 29 (0x1d) iload_3 ✅ +- 30 (0x1e) lload_0 ✅ +- 31 (0x1f) lload_1 ✅ +- 32 (0x20) lload_2 ✅ +- 33 (0x21) lload_3 ✅ +- 34 (0x22) fload_0 ✅ +- 35 (0x23) fload_1 ✅ +- 36 (0x24) fload_2 ✅ +- 37 (0x25) fload_3 ✅ +- 38 (0x26) dload_0 ✅ +- 39 (0x27) dload_1 ✅ +- 40 (0x28) dload_2 ✅ +- 41 (0x29) dload_3 ✅ +- 42 (0x2a) aload_0 ✅ +- 43 (0x2b) aload_1 ✅ +- 44 (0x2c) aload_2 ✅ +- 45 (0x2d) aload_3 ✅ +- 46 (0x2e) iaload ❌ +- 47 (0x2f) laload ❌ +- 48 (0x30) faload ❌ +- 49 (0x31) daload ❌ +- 50 (0x32) aaload ❌ +- 51 (0x33) baload ❌ +- 52 (0x34) caload ❌ +- 53 (0x35) saload ❌ + +**Stores** +- 54 (0x36) istore ✅ +- 55 (0x37) lstore ✅ +- 56 (0x38) fstore ✅ +- 57 (0x39) dstore ✅ +- 58 (0x3a) astore ✅ +- 59 (0x3b) istore_0 ✅ +- 60 (0x3c) istore_1 ✅ +- 61 (0x3d) istore_2 ✅ +- 62 (0x3e) istore_3 ✅ +- 63 (0x3f) lstore_0 ✅ +- 64 (0x40) lstore_1 ✅ +- 65 (0x41) lstore_2 ✅ +- 66 (0x42) lstore_3 ✅ +- 67 (0x43) fstore_0 ✅ +- 68 (0x44) fstore_1 ✅ +- 69 (0x45) fstore_2 ✅ +- 70 (0x46) fstore_3 ✅ +- 71 (0x47) dstore_0 ✅ +- 72 (0x48) dstore_1 ✅ +- 73 (0x49) dstore_2 ✅ +- 74 (0x4a) dstore_3 ✅ +- 75 (0x4b) astore_0 ✅ +- 76 (0x4c) astore_1 ✅ +- 77 (0x4d) astore_2 ✅ +- 78 (0x4e) astore_3 ✅ +- 79 (0x4f) iastore ❌ +- 80 (0x50) lastore ❌ +- 81 (0x51) fastore ❌ +- 82 (0x52) dastore ❌ +- 83 (0x53) aastore ❌ +- 84 (0x54) bastore ❌ +- 85 (0x55) castore ❌ +- 86 (0x56) sastore ❌ + +**Stack** +- 87 (0x57) pop ✅ +- 88 (0x58) pop2 ✅ +- 89 (0x59) dup ✅ +- 90 (0x5a) dup_x1 ✅ +- 91 (0x5b) dup_x2 ✅ +- 92 (0x5c) dup2 ✅ +- 93 (0x5d) dup2_x1 ✅ +- 94 (0x5e) dup2_x2 ✅ +- 95 (0x5f) swap ✅ + +**Math** +- 96 (0x60) iadd ✅ +- 97 (0x61) ladd ✅ +- 98 (0x62) fadd ✅ +- 99 (0x63) dadd ✅ +- 100 (0x64) isub ✅ +- 101 (0x65) lsub ✅ +- 102 (0x66) fsub ✅ +- 103 (0x67) dsub ✅ +- 104 (0x68) imul ✅ +- 105 (0x69) lmul ✅ +- 106 (0x6a) fmul ✅ +- 107 (0x6b) dmul ✅ +- 108 (0x6c) idiv ✅ +- 109 (0x6d) ldiv ✅ +- 110 (0x6e) fdiv ✅ +- 111 (0x6f) ddiv ✅ +- 112 (0x70) irem ✅ +- 113 (0x71) lrem ✅ +- 114 (0x72) frem ✅ +- 115 (0x73) drem ✅ +- 116 (0x74) ineg ✅ +- 117 (0x75) lneg ✅ +- 118 (0x76) fneg ✅ +- 119 (0x77) dneg ✅ +- 120 (0x78) ishl ✅ +- 121 (0x79) lshl ✅ +- 122 (0x7a) ishr ✅ +- 123 (0x7b) lshr ✅ +- 124 (0x7c) iushr ✅ +- 125 (0x7d) lushr ✅ +- 126 (0x7e) iand ✅ +- 127 (0x7f) land ✅ +- 128 (0x80) ior ✅ +- 129 (0x81) lor ✅ +- 130 (0x82) ixor ✅ +- 131 (0x83) lxor ✅ +- 132 (0x84) iinc ✅ + +**Conversions** +- 133 (0x85) i2l ✅ +- 134 (0x86) i2f ✅ +- 135 (0x87) i2d ✅ +- 136 (0x88) l2i ✅ +- 137 (0x89) l2f ✅ +- 138 (0x8a) l2d ✅ +- 139 (0x8b) f2i ✅ +- 140 (0x8c) f2l ✅ +- 141 (0x8d) f2d ✅ +- 142 (0x8e) d2i ✅ +- 143 (0x8f) d2l ✅ +- 144 (0x90) d2f ✅ +- 145 (0x91) i2b ✅ +- 146 (0x92) i2c ✅ +- 147 (0x93) i2s ✅ + +**Comparisons** +- 148 (0x94) lcmp ✅ +- 149 (0x95) fcmpl ✅ +- 150 (0x96) fcmpg ✅ +- 151 (0x97) dcmpl ✅ +- 152 (0x98) dcmpg ✅ +- 153 (0x99) ifeq ✅ +- 154 (0x9a) ifne ✅ +- 155 (0x9b) iflt ✅ +- 156 (0x9c) ifge ✅ +- 157 (0x9d) ifgt ✅ +- 158 (0x9e) ifle ✅ +- 159 (0x9f) if_icmpeq ✅ +- 160 (0xa0) if_icmpne ✅ +- 161 (0xa1) if_icmplt ✅ +- 162 (0xa2) if_icmpge ✅ +- 163 (0xa3) if_icmpgt ✅ +- 164 (0xa4) if_icmple ✅ +- 165 (0xa5) if_acmpeq ✅ +- 166 (0xa6) if_acmpne ✅ + +**Control** +- 167 (0xa7) goto ✅ +- 168 (0xa8) jsr ❌ +- 169 (0xa9) ret ❌ +- 170 (0xaa) tableswitch ❌ +- 171 (0xab) lookupswitch ❌ +- 172 (0xac) ireturn ✅ +- 173 (0xad) lreturn ✅ +- 174 (0xae) freturn ✅ +- 175 (0xaf) dreturn ✅ +- 176 (0xb0) areturn ❌ +- 177 (0xb1) return ✅ + +**References** +- 178 (0xb2) getstatic ✅ +- 179 (0xb3) putstatic ✅ +- 180 (0xb4) getfield ✅ +- 181 (0xb5) putfield ✅ +- 182 (0xb6) invokevirtual ❌ +- 183 (0xb7) invokespecial ❌ +- 184 (0xb8) invokestatic ✅ +- 185 (0xb9) invokeinterface ❌ +- 186 (0xba) invokedynamic ❌ +- 187 (0xbb) new ❌ +- 188 (0xbc) newarray ❌ +- 189 (0xbd) anewarray ❌ +- 190 (0xbe) arraylength ❌ +- 191 (0xbf) athrow ❌ +- 192 (0xc0) checkcast ❌ +- 193 (0xc1) instanceof ❌ +- 194 (0xc2) monitorenter ❌ +- 195 (0xc3) monitorexit ❌ + +**Extended** +- 196 (0xc4) wide ❌ +- 197 (0xc5) multianewarray ❌ +- 198 (0xc6) ifnull ✅ +- 199 (0xc7) ifnonnull ✅ +- 200 (0xc8) goto_w ❌ +- 201 (0xc9) jsr_w ❌ + +**Reserved** +- 202 (0xca) breakpoint ❌ +- 254 (0xfe) impdep1 ❌ +- 255 (0xff) impdep2 ❌ + +MicroJIT is also currently known to have issue with methods that are far down the call stack which contain live object references. \ No newline at end of file diff --git a/doc/compiler/MicroJIT/vISA.md b/doc/compiler/MicroJIT/vISA.md new file mode 100644 index 00000000000..16b920fa7af --- /dev/null +++ b/doc/compiler/MicroJIT/vISA.md @@ -0,0 +1,45 @@ + + +# Overview + +MicroJIT uses a stack-based virtual ISA. This means values are pushed onto +the computation stack, popped off the computation stack, saved and +loaded from a local array, and that operands for a given instruction +are either in the instruction stream, or on the computation stack. + +MicroJIT is implemented on register-based architectures, and makes use +of those registers as pointers onto the stack and temporary holding +cells for values either being used by an operation, or being saved +and loaded to and from the local array. Below are the mappings for +the supported architectures. + +# AMD64 +| Register | Description | +|:--------:|:------------------------------------------------------------------------------------------------:| +| rsp | The stack extent of the computation stack pointer | +| r10 | The computation stack pointer | +| r11 | Storing the accumulator or pointer to an object | +| r12 | Storing values that act on the accumulator, are to be written to, or have been read from a field | +| r13 | Holds addresses for absolute addressing | +| r14 | The pointer to the start of the local array | +| r15 | Values loaded from memory for storing on the stack or in the local array | \ No newline at end of file diff --git a/runtime/compiler/CMakeLists.txt b/runtime/compiler/CMakeLists.txt index 2ac3bbfccbf..0065626da41 100644 --- a/runtime/compiler/CMakeLists.txt +++ b/runtime/compiler/CMakeLists.txt @@ -38,15 +38,28 @@ if(OMR_ARCH_X86) endif() endif() enable_language(ASM_NASM) - # We have to manually append "/" to the paths as NASM versions older than v2.14 requires trailing / in the directory paths. - set(asm_inc_dirs - "-I${j9vm_SOURCE_DIR}/oti/" - "-I${j9vm_BINARY_DIR}/oti/" - "-I${CMAKE_CURRENT_SOURCE_DIR}/" - "-I${CMAKE_CURRENT_SOURCE_DIR}/x/runtime/" - "-I${CMAKE_CURRENT_SOURCE_DIR}/x/amd64/runtime/" - "-I${CMAKE_CURRENT_SOURCE_DIR}/x/i386/runtime/" - ) + # We have to manually append "/" to the paths as NASM versions older than v2.14 requires trailing / in the directory paths + if(J9VM_OPT_MICROJIT) + set(asm_inc_dirs + "-I${j9vm_SOURCE_DIR}/oti/" + "-I${j9vm_BINARY_DIR}/oti/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/x/runtime/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/x/amd64/runtime/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/x/i386/runtime/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/microjit/assembly/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/microjit/x/amd64/templates/" + ) + else() + set(asm_inc_dirs + "-I${j9vm_SOURCE_DIR}/oti/" + "-I${j9vm_BINARY_DIR}/oti/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/x/runtime/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/x/amd64/runtime/" + "-I${CMAKE_CURRENT_SOURCE_DIR}/x/i386/runtime/" + ) + endif() omr_append_flags(CMAKE_ASM_NASM_FLAGS ${asm_inc_dirs}) # For whatever reason cmake does not apply compile definitions when building nasm objects. # The if guard is here in case they ever start doing so. @@ -106,6 +119,10 @@ if(J9VM_OPT_JITSERVER) include_directories(${OPENSSL_INCLUDE_DIR}) endif() +if(J9VM_OPT_MICROJIT) + message(STATUS "MicroJIT is enabled") +endif() + # TODO We should get rid of this, but it's still required by the compiler_support module in OMR. set(J9SRC ${j9vm_SOURCE_DIR}) @@ -146,6 +163,10 @@ if(J9VM_OPT_JITSERVER) add_subdirectory(net) endif() +if(J9VM_OPT_MICROJIT) + add_subdirectory(microjit) +endif() + if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/${TR_HOST_ARCH}") add_subdirectory(${TR_HOST_ARCH}) endif() diff --git a/runtime/compiler/build/files/common.mk b/runtime/compiler/build/files/common.mk index d0efa8acf90..511fc5abe76 100644 --- a/runtime/compiler/build/files/common.mk +++ b/runtime/compiler/build/files/common.mk @@ -417,6 +417,12 @@ JIT_PRODUCT_SOURCE_FILES+=\ compiler/runtime/MetricsServer.cpp endif +ifneq ($(J9VM_OPT_MICROJIT),) +JIT_PRODUCT_SOURCE_FILES+=\ + compiler/microjit/ExceptionTable.cpp + compiler/microjit/CompilationInfoPerThreadBase.cpp +endif + -include $(JIT_MAKE_DIR)/files/extra.mk include $(JIT_MAKE_DIR)/files/host/$(HOST_ARCH).mk include $(JIT_MAKE_DIR)/files/target/$(TARGET_ARCH).mk diff --git a/runtime/compiler/build/files/target/amd64.mk b/runtime/compiler/build/files/target/amd64.mk index c77c3e7debe..3c43a82e544 100644 --- a/runtime/compiler/build/files/target/amd64.mk +++ b/runtime/compiler/build/files/target/amd64.mk @@ -1,4 +1,4 @@ -# Copyright (c) 2000, 2021 IBM Corp. and others +# Copyright (c) 2000, 2022 IBM Corp. and others # # This program and the accompanying materials are made available under # the terms of the Eclipse Public License 2.0 which accompanies this @@ -35,3 +35,12 @@ JIT_PRODUCT_SOURCE_FILES+=\ compiler/x/amd64/codegen/AMD64JNILinkage.cpp \ compiler/x/amd64/codegen/AMD64PrivateLinkage.cpp \ compiler/x/amd64/codegen/J9CodeGenerator.cpp + +ifneq ($(J9VM_OPT_MICROJIT),) +JIT_PRODUCT_SOURCE_FILES+=\ + compiler/microjit/x/amd64/AMD64Codegen.cpp \ + compiler/microjit/x/amd64/AMD64CodegenGC.cpp \ + compiler/microjit/x/amd64/AMD64Linkage.cpp \ + compiler/microjit/x/amd64/templates/linkage.nasm \ + compiler/microjit/x/amd64/templates/bytecodes.nasm +endif diff --git a/runtime/compiler/build/toolcfg/common.mk b/runtime/compiler/build/toolcfg/common.mk index b0b74aa6f63..749a577a9e6 100644 --- a/runtime/compiler/build/toolcfg/common.mk +++ b/runtime/compiler/build/toolcfg/common.mk @@ -1,4 +1,4 @@ -# Copyright (c) 2000, 2020 IBM Corp. and others +# Copyright (c) 2000, 2022 IBM Corp. and others # # This program and the accompanying materials are made available under # the terms of the Eclipse Public License 2.0 which accompanies this @@ -43,6 +43,11 @@ PRODUCT_INCLUDES=\ $(J9SRC)/oti \ $(J9SRC)/util +ifneq ($(J9VM_OPT_MICROJIT),) + PRODUCT_INCLUDES+=\ + $(FIXED_SRCBASE)/compiler/microjit +endif + PRODUCT_DEFINES+=\ BITVECTOR_BIT_NUMBERING_MSB \ J9_PROJECT_SPECIFIC diff --git a/runtime/compiler/build/toolcfg/gnu/common.mk b/runtime/compiler/build/toolcfg/gnu/common.mk index c4de3e711b4..b294cef0141 100644 --- a/runtime/compiler/build/toolcfg/gnu/common.mk +++ b/runtime/compiler/build/toolcfg/gnu/common.mk @@ -1,4 +1,4 @@ -# Copyright (c) 2000, 2021 IBM Corp. and others +# Copyright (c) 2000, 2022 IBM Corp. and others # # This program and the accompanying materials are made available under # the terms of the Eclipse Public License 2.0 which accompanies this @@ -290,6 +290,11 @@ ifeq ($(HOST_ARCH),x) $(J9SRC)/compiler \ $(J9SRC)/compiler/x/runtime + ifneq ($(J9VM_OPT_MICROJIT),) + NASM_INCLUDES+=\ + $(J9SRC)/compiler/microjit/assembly + endif + ifeq ($(HOST_BITS),32) NASM_OBJ_FORMAT=-felf32 @@ -310,6 +315,11 @@ ifeq ($(HOST_ARCH),x) TR_HOST_64BIT \ TR_TARGET_64BIT + ifneq ($(J9VM_OPT_MICROJIT),) + NASM_DEFINES+=\ + J9VM_OPT_MICROJIT + endif + NASM_INCLUDES+=\ $(J9SRC)/compiler/x/amd64/runtime endif diff --git a/runtime/compiler/control/CompilationRuntime.hpp b/runtime/compiler/control/CompilationRuntime.hpp index 9b41bc2fb1e..3608b298ded 100644 --- a/runtime/compiler/control/CompilationRuntime.hpp +++ b/runtime/compiler/control/CompilationRuntime.hpp @@ -674,12 +674,50 @@ class CompilationInfo #if defined(J9VM_OPT_JITSERVER) TR_ASSERT_FATAL(!TR::CompilationInfo::getStream(), "not yet implemented for JITServer"); #endif /* defined(J9VM_OPT_JITSERVER) */ - value = (value << 1) | J9_STARTPC_NOT_TRANSLATED; - if (value < 0) - value = INT_MAX; - method->extra = reinterpret_cast(static_cast(value)); +#if defined(J9VM_OPT_MICROJIT) + TR::Options *options = TR::Options::getJITCmdLineOptions(); + /* Setting MicroJIT invocation count to 0 + causes some methods to not get compiled. + Hence we generally set it to a non-zero value. + 20 has been proven to be a good estimate for count value + for similar template-based JITs. + */ + if (options->_mjitEnabled && value) + { + return; + } + else +#endif + { + value = (value << 1) | J9_STARTPC_NOT_TRANSLATED; + if (value < 0) + value = INT_MAX; + method->extra = reinterpret_cast(static_cast(value)); + } } +#if defined(J9VM_OPT_MICROJIT) + static void setInitialMJITCountUnsynchronized(J9Method *method, int32_t mjitThreshold, int32_t trCount, J9JITConfig *jitConfig, J9VMThread *vmThread) + { + if (TR::Options::getJITCmdLineOptions()->_mjitEnabled) + { + intptr_t value; + if (trCount < mjitThreshold) + { + value = (intptr_t)((trCount << 1) | J9_STARTPC_NOT_TRANSLATED); + TR_J9VMBase *fe = TR_J9VMBase::get(jitConfig, vmThread); + uint8_t *extendedFlags = fe->fetchMethodExtendedFlagsPointer(method); + *extendedFlags = *extendedFlags | J9_MJIT_FAILED_COMPILE; + } + else + { + value = (intptr_t)((mjitThreshold << 1) | J9_STARTPC_NOT_TRANSLATED); + } + method->extra = reinterpret_cast(static_cast(value)); + } + } +#endif + static uint32_t getMethodBytecodeSize(const J9ROMMethod * romMethod); static uint32_t getMethodBytecodeSize(J9Method* method); diff --git a/runtime/compiler/control/CompilationThread.cpp b/runtime/compiler/control/CompilationThread.cpp index 26d8eebd70f..60f50a3f749 100644 --- a/runtime/compiler/control/CompilationThread.cpp +++ b/runtime/compiler/control/CompilationThread.cpp @@ -94,6 +94,15 @@ #include "env/J9SegmentCache.hpp" #include "env/SystemSegmentProvider.hpp" #include "env/DebugSegmentProvider.hpp" +#include "ilgen/J9ByteCodeIterator.hpp" +#include "ilgen/J9ByteCodeIteratorWithState.hpp" +#if defined(J9VM_OPT_MICROJIT) +#include "microjit/x/amd64/AMD64Codegen.hpp" +#include "microjit/x/amd64/AMD64CodegenGC.hpp" +#include "microjit/SideTables.hpp" +#include "microjit/utils.hpp" +#include "codegen/OMRLinkage_inlines.hpp" +#endif /* J9VM_OPT_MICROJIT */ #if defined(J9VM_OPT_JITSERVER) #include "control/JITClientCompilationThread.hpp" #include "control/JITServerCompilationThread.hpp" @@ -155,6 +164,24 @@ extern TR::OptionSet *findOptionSet(J9Method *, bool); #define DECOMPRESSION_FAILED -1 #define DECOMPRESSION_SUCCEEDED 0 +#if defined(J9VM_OPT_MICROJIT) +static bool +shouldCompileWithMicroJIT(J9Method *method, J9JITConfig *jitConfig, J9VMThread *vmThread) + { + // If MicroJIT is disabled, return early and avoid overhead + bool compileWithMicroJIT = TR::Options::getJITCmdLineOptions()->_mjitEnabled; + if (!compileWithMicroJIT) + return false; + + TR_J9VMBase *fe = TR_J9VMBase::get(jitConfig, vmThread); + UDATA extra = (UDATA)method->extra; + uint8_t *extendedFlags = fe->fetchMethodExtendedFlagsPointer(method); + return (J9_ARE_ALL_BITS_SET(extra, J9_STARTPC_NOT_TRANSLATED) // MicroJIT is not a target for recompilation + && !(J9_ROM_METHOD_FROM_RAM_METHOD(method)->modifiers & J9AccNative) // MicroJIT does not compile native methods + && !(*extendedFlags & J9_MJIT_FAILED_COMPILE)); // MicroJIT failed to compile this method already + } +#endif + #if defined(WINDOWS) void setThreadAffinity(unsigned _int64 handle, unsigned long mask) { @@ -2333,6 +2360,14 @@ bool TR::CompilationInfo::shouldRetryCompilation(TR_MethodToBeCompiled *entry, T entry->_optimizationPlan->setDisableGCR(); // GCR isn't needed tryCompilingAgain = true; break; +#if defined(J9VM_OPT_MICROJIT) + case mjitCompilationFailure: + /* MicroJIT generates this failure when it fails to compile. + The next time this method is queued for compilation, + it will be compiled by the regular JIT compiler, Testarossa. */ + tryCompilingAgain = true; + break; +#endif case compilationNullSubstituteCodeCache: case compilationCodeMemoryExhausted: case compilationCodeCacheError: @@ -9073,9 +9108,17 @@ TR::CompilationInfoPerThreadBase::wrappedCompile(J9PortLibrary *portLib, void * Trc_JIT_compileStart(vmThread, hotnessString, compiler->signature()); TR_ASSERT(compiler->getMethodHotness() != unknownHotness, "Trying to compile at unknown hotness level"); - - metaData = that->compile(vmThread, compiler, compilee, *vm, p->_optimizationPlan, scratchSegmentProvider); - +#if defined(J9VM_OPT_MICROJIT) + J9Method *method = that->_methodBeingCompiled->getMethodDetails().getMethod(); + if (shouldCompileWithMicroJIT(method, jitConfig, vmThread)) + { + metaData = that->mjit(vmThread, compiler, compilee, *vm, p->_optimizationPlan, scratchSegmentProvider, p->trMemory()); + } + else +#endif + { + metaData = that->compile(vmThread, compiler, compilee, *vm, p->_optimizationPlan, scratchSegmentProvider); + } } try @@ -10919,6 +10962,13 @@ TR::CompilationInfoPerThreadBase::processException( _methodBeingCompiled->_compErrCode = compilationHeapLimitExceeded; } /* Allocation Exceptions End */ +#if defined(J9VM_OPT_MICROJIT) + catch (const MJIT::MJITCompilationFailure &e) + { + shouldProcessExceptionCommonTasks = false; + _methodBeingCompiled->_compErrCode = mjitCompilationFailure; + } +#endif /* IL Gen Exceptions Start */ catch (const J9::AOTHasInvokeHandle &e) diff --git a/runtime/compiler/control/CompilationThread.hpp b/runtime/compiler/control/CompilationThread.hpp index c12ddf76f86..af6a17736e3 100644 --- a/runtime/compiler/control/CompilationThread.hpp +++ b/runtime/compiler/control/CompilationThread.hpp @@ -157,9 +157,26 @@ class CompilationInfoPerThreadBase TR_MethodMetaData *getMetadata() {return _metadata;} void setMetadata(TR_MethodMetaData *m) {_metadata = m;} void *compile(J9VMThread *context, TR_MethodToBeCompiled *entry, J9::J9SegmentProvider &scratchSegmentProvider); - TR_MethodMetaData *compile(J9VMThread *context, TR::Compilation *, - TR_ResolvedMethod *compilee, TR_J9VMBase &, TR_OptimizationPlan*, TR::SegmentAllocator const &scratchSegmentProvider); - TR_MethodMetaData *performAOTLoad(J9VMThread *context, TR::Compilation *, TR_ResolvedMethod *compilee, TR_J9VMBase *vm, J9Method *method); +#if defined(J9VM_OPT_MICROJIT) + TR_MethodMetaData *mjit(J9VMThread *context, + TR::Compilation *, + TR_ResolvedMethod *compilee, + TR_J9VMBase &, + TR_OptimizationPlan*, + TR::SegmentAllocator const &scratchSegmentProvider, + TR_Memory *trMemory); +#endif + TR_MethodMetaData *compile(J9VMThread *context, + TR::Compilation *, + TR_ResolvedMethod *compilee, + TR_J9VMBase &, + TR_OptimizationPlan*, + TR::SegmentAllocator const &scratchSegmentProvider); + TR_MethodMetaData *performAOTLoad(J9VMThread *context, + TR::Compilation *, + TR_ResolvedMethod *compilee, + TR_J9VMBase *vm, + J9Method *method); void preCompilationTasks(J9VMThread * vmThread, TR_MethodToBeCompiled *entry, diff --git a/runtime/compiler/control/HookedByTheJit.cpp b/runtime/compiler/control/HookedByTheJit.cpp index 34121579404..3a96a51c75a 100644 --- a/runtime/compiler/control/HookedByTheJit.cpp +++ b/runtime/compiler/control/HookedByTheJit.cpp @@ -610,7 +610,16 @@ static void jitHookInitializeSendTarget(J9HookInterface * * hook, UDATA eventNum TR_VerboseLog::writeLineLocked(TR_Vlog_PERF, "Setting count=%d for %s", count, buffer); } - TR::CompilationInfo::setInitialInvocationCountUnsynchronized(method,count); +#if defined(J9VM_OPT_MICROJIT) + if (optionsJIT->_mjitEnabled) + { + int32_t mjitCount = optionsJIT->_mjitInitialCount; + if (mjitCount && !(TR::Options::sharedClassCache() && jitConfig->javaVM->sharedClassConfig->existsCachedCodeForROMMethod(vmThread, romMethod))) + TR::CompilationInfo::setInitialMJITCountUnsynchronized(method, mjitCount, count, jitConfig, vmThread); + } +#endif + + TR::CompilationInfo::setInitialInvocationCountUnsynchronized(method, count); if (TR::Options::getJITCmdLineOptions()->getOption(TR_DumpInitialMethodNamesAndCounts) || TR::Options::getAOTCmdLineOptions()->getOption(TR_DumpInitialMethodNamesAndCounts)) { diff --git a/runtime/compiler/control/J9Options.cpp b/runtime/compiler/control/J9Options.cpp index 1e3b0fdfb81..b9173175374 100644 --- a/runtime/compiler/control/J9Options.cpp +++ b/runtime/compiler/control/J9Options.cpp @@ -278,6 +278,11 @@ int32_t J9::Options::_dltPostponeThreshold = 2; int32_t J9::Options::_expensiveCompWeight = TR::CompilationInfo::JSR292_WEIGHT; int32_t J9::Options::_jProfilingEnablementSampleThreshold = 10000; +#if defined(J9VM_OPT_MICROJIT) +int32_t J9::Options::_mjitEnabled = 0; +int32_t J9::Options::_mjitInitialCount = 20; +#endif + bool J9::Options::_aggressiveLockReservation = false; //************************************************************************ @@ -919,6 +924,12 @@ TR::OptionTable OMR::Options::_feOptions[] = { TR::Options::setStaticNumeric, (intptr_t)&TR::Options::_minSamplingPeriod, 0, "P%d", NOT_IN_SUBSET}, {"minSuperclassArraySize=", "I\t set the size of the minimum superclass array size", TR::Options::setStaticNumeric, (intptr_t)&TR::Options::_minimumSuperclassArraySize, 0, "F%d", NOT_IN_SUBSET}, +#if defined(J9VM_OPT_MICROJIT) + {"mjitCount=", "C\tnumber of invocations before MicroJIT compiles methods without loops", + TR::Options::setStaticNumeric, (intptr_t)&TR::Options::_mjitInitialCount, 0, "F%d"}, + {"mjitEnabled=", "C\tenable MicroJIT (set to 1 to enable, default=0)", + TR::Options::setStaticNumeric, (intptr_t)&TR::Options::_mjitEnabled, 0, "F%d"}, +#endif {"noregmap", 0, RESET_JITCONFIG_RUNTIME_FLAG(J9JIT_CG_REGISTER_MAPS) }, {"numCodeCachesOnStartup=", "R\tnumber of code caches to create at startup", TR::Options::setStaticNumeric, (intptr_t)&TR::Options::_numCodeCachesToCreateAtStartup, 0, "F%d", NOT_IN_SUBSET}, diff --git a/runtime/compiler/control/J9Options.hpp b/runtime/compiler/control/J9Options.hpp index b7c651470b7..a15592dd35e 100644 --- a/runtime/compiler/control/J9Options.hpp +++ b/runtime/compiler/control/J9Options.hpp @@ -333,6 +333,15 @@ class OMR_EXTENSIBLE Options : public OMR::OptionsConnector static int32_t _expensiveCompWeight; // weight of a comp request to be considered expensive static int32_t _jProfilingEnablementSampleThreshold; +#if defined(J9VM_OPT_MICROJIT) + static int32_t getMJITEnabled() { return _mjitEnabled; } + static int32_t _mjitEnabled; + + + static int32_t getMJITInitialCount() { return _mjitInitialCount; } + static int32_t _mjitInitialCount; +#endif + static bool _aggressiveLockReservation; static void printPID(); diff --git a/runtime/compiler/control/J9Recompilation.cpp b/runtime/compiler/control/J9Recompilation.cpp index 0e18e0c0c4c..781d632f019 100644 --- a/runtime/compiler/control/J9Recompilation.cpp +++ b/runtime/compiler/control/J9Recompilation.cpp @@ -497,7 +497,7 @@ J9::Recompilation::getExistingMethodInfo(TR_ResolvedMethod *method) /** * This method can extract a value profiler from the current list of * recompilation profilers. - * + * * \return The first TR_ValueProfiler in the current list of profilers, NULL if there are none. */ TR_ValueProfiler * @@ -739,7 +739,7 @@ TR_PersistentMethodInfo::setForSharedInfo(TR_PersistentProfileInfo** ptr, TR_Per // Before it can be accessed, inc ref count on new info if (newInfo) TR_PersistentProfileInfo::incRefCount(newInfo); - + // Update ptr as if it was unlocked // Doesn't matter what the old info was, as long as it was unlocked do { diff --git a/runtime/compiler/control/RecompilationInfo.hpp b/runtime/compiler/control/RecompilationInfo.hpp index b9f2a44fe3f..6d041008156 100644 --- a/runtime/compiler/control/RecompilationInfo.hpp +++ b/runtime/compiler/control/RecompilationInfo.hpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2021 IBM Corp. and others + * Copyright (c) 2000, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -44,6 +44,9 @@ class TR_FrontEnd; class TR_OpaqueMethodBlock; class TR_OptimizationPlan; class TR_ResolvedMethod; +#if defined(J9VM_OPT_MICROJIT) +namespace MJIT { class CodeGenerator; } +#endif namespace TR { class Instruction; } namespace TR { class SymbolReference; } @@ -321,6 +324,9 @@ class TR_PersistentJittedBodyInfo friend class TR_EmilyPersistentJittedBodyInfo; friend class ::OMR::Options; friend class J9::Options; +#if defined(J9VM_OPT_MICROJIT) + friend class MJIT::CodeGenerator; +#endif #if defined(TR_HOST_X86) || defined(TR_HOST_POWER) || defined(TR_HOST_S390) || defined(TR_HOST_ARM) || defined(TR_HOST_ARM64) friend void fixPersistentMethodInfo(void *table, bool isJITClientAOTLoad); @@ -332,6 +338,10 @@ class TR_PersistentJittedBodyInfo static TR_PersistentJittedBodyInfo *get(void *startPC); bool getHasLoops() { return _flags.testAny(HasLoops); } +#if defined(J9VM_OPT_MICROJIT) + bool isMJITCompiledMethod() { return _flags.testAny(IsMJITCompiled); } + void setIsMJITCompiledMethod(bool b){ _flags.set(IsMJITCompiled, b); } +#endif /* defined(J9VM_OPT_MICROJIT) */ bool getUsesPreexistence() { return _flags.testAny(UsesPreexistence); } bool getDisableSampling() { return _flags.testAny(DisableSampling); } void setDisableSampling(bool b) { _flags.set(DisableSampling, b); } @@ -410,7 +420,9 @@ class TR_PersistentJittedBodyInfo enum { HasLoops = 0x0001, - //HasManyIterationsLoops = 0x0002, // Available +#if defined(J9VM_OPT_MICROJIT) + IsMJITCompiled = 0x0002, +#endif /* defined(J9VM_OPT_MICROJIT) */ UsesPreexistence = 0x0004, DisableSampling = 0x0008, // This flag disables sampling of this method even though its recompilable IsProfilingBody = 0x0010, diff --git a/runtime/compiler/control/rossa.cpp b/runtime/compiler/control/rossa.cpp index c6be8c1c9e5..ecfcd502843 100644 --- a/runtime/compiler/control/rossa.cpp +++ b/runtime/compiler/control/rossa.cpp @@ -204,13 +204,16 @@ char *compilationErrorNames[]={ "compilationAotPatchedCPConstant", // 45 "compilationAotHasInvokeSpecialInterface", // 46 "compilationRelocationFailure", // 47 +#if defined(J9VM_OPT_MICROJIT) + "mjitCompilationFailure", // 48 +#endif /* defined(J9VM_OPT_MICROJIT) */ #if defined(J9VM_OPT_JITSERVER) - "compilationStreamFailure", // compilationFirstJITServerFailure = 48 - "compilationStreamLostMessage", // compilationFirstJITServerFailure + 1 = 49 - "compilationStreamMessageTypeMismatch", // compilationFirstJITServerFailure + 2 = 50 - "compilationStreamVersionIncompatible", // compilationFirstJITServerFailure + 3 = 51 - "compilationStreamInterrupted", // compilationFirstJITServerFailure + 4 = 52 - "aotCacheDeserializationFailure", // compilationFirstJITServerFailure + 5 = 53 + "compilationStreamFailure", // compilationFirstJITServerFailure = 48 or 49 + "compilationStreamLostMessage", // compilationFirstJITServerFailure + 1 = 49 or 50 + "compilationStreamMessageTypeMismatch", // compilationFirstJITServerFailure + 2 = 50 or 51 + "compilationStreamVersionIncompatible", // compilationFirstJITServerFailure + 3 = 51 or 52 + "compilationStreamInterrupted", // compilationFirstJITServerFailure + 4 = 52 or 53 + "aotCacheDeserializationFailure", // compilationFirstJITServerFailure + 5 = 53 or 54 #endif /* defined(J9VM_OPT_JITSERVER) */ "compilationMaxError" }; diff --git a/runtime/compiler/control/rossa.h b/runtime/compiler/control/rossa.h index 03553736a0c..2b054a680f6 100644 --- a/runtime/compiler/control/rossa.h +++ b/runtime/compiler/control/rossa.h @@ -74,14 +74,17 @@ typedef enum { compilationAotPatchedCPConstant = 45, compilationAotHasInvokeSpecialInterface = 46, compilationRelocationFailure = 47, +#if defined(J9VM_OPT_MICROJIT) + mjitCompilationFailure = 48, +#endif /* defined(J9VM_OPT_MICROJIT) */ #if defined(J9VM_OPT_JITSERVER) compilationFirstJITServerFailure, - compilationStreamFailure = compilationFirstJITServerFailure, // 48 - compilationStreamLostMessage = compilationFirstJITServerFailure + 1, // 49 - compilationStreamMessageTypeMismatch = compilationFirstJITServerFailure + 2, // 50 - compilationStreamVersionIncompatible = compilationFirstJITServerFailure + 3, // 51 - compilationStreamInterrupted = compilationFirstJITServerFailure + 4, // 52 - aotCacheDeserializationFailure = compilationFirstJITServerFailure + 5, // 53 + compilationStreamFailure = compilationFirstJITServerFailure, // 48 or 49 + compilationStreamLostMessage = compilationFirstJITServerFailure + 1, // 49 or 50 + compilationStreamMessageTypeMismatch = compilationFirstJITServerFailure + 2, // 50 or 51 + compilationStreamVersionIncompatible = compilationFirstJITServerFailure + 3, // 51 or 52 + compilationStreamInterrupted = compilationFirstJITServerFailure + 4, // 52 or 53 + aotCacheDeserializationFailure = compilationFirstJITServerFailure + 5, // 53 or 54 #endif /* defined(J9VM_OPT_JITSERVER) */ /* must be the last one */ diff --git a/runtime/compiler/env/VMJ9.cpp b/runtime/compiler/env/VMJ9.cpp index cb1eb5d1721..bb9f75d9196 100644 --- a/runtime/compiler/env/VMJ9.cpp +++ b/runtime/compiler/env/VMJ9.cpp @@ -6680,6 +6680,15 @@ TR_J9VMBase::isGetImplInliningSupported() return jvm->memoryManagerFunctions->j9gc_modron_isFeatureSupported(jvm, j9gc_modron_feature_inline_reference_get) != 0; } +#if defined(J9VM_OPT_MICROJIT) +bool +TR_J9VMBase::isMJITExtendedFlagsMethod(J9Method *method) + { + uint8_t *extendedFlags = fetchMethodExtendedFlagsPointer(method); + return ((*extendedFlags & J9_MJIT_FAILED_COMPILE) != J9_MJIT_FAILED_COMPILE); + } +#endif + /** \brief * Get the raw modifier from the class pointer. * diff --git a/runtime/compiler/env/VMJ9.h b/runtime/compiler/env/VMJ9.h index 60d869cbfd9..44de88a2cb6 100644 --- a/runtime/compiler/env/VMJ9.h +++ b/runtime/compiler/env/VMJ9.h @@ -252,6 +252,9 @@ class TR_J9VMBase : public TR_FrontEnd virtual bool canAllowDifferingNumberOrTypesOfArgsAndParmsInInliner(); ///// virtual bool isGetImplInliningSupported(); +#if defined(J9VM_OPT_MICROJIT) + virtual bool isMJITExtendedFlagsMethod(J9Method *); +#endif virtual uintptr_t getClassDepthAndFlagsValue(TR_OpaqueClassBlock * classPointer); virtual uintptr_t getClassFlagsValue(TR_OpaqueClassBlock * classPointer); @@ -1542,6 +1545,9 @@ struct TR_PCMap typedef J9JITExceptionTable TR_MethodMetaData; TR_MethodMetaData * createMethodMetaData(TR_J9VMBase &, TR_ResolvedMethod *, TR::Compilation *); +#if defined(J9VM_OPT_MICROJIT) +TR_MethodMetaData * createMJITMethodMetaData(TR_J9VMBase &, TR_ResolvedMethod *, TR::Compilation *); +#endif extern J9JITConfig * jitConfig; diff --git a/runtime/compiler/ilgen/J9ByteCodeIterator.cpp b/runtime/compiler/ilgen/J9ByteCodeIterator.cpp index 76e5dd027ec..f99b176cd70 100644 --- a/runtime/compiler/ilgen/J9ByteCodeIterator.cpp +++ b/runtime/compiler/ilgen/J9ByteCodeIterator.cpp @@ -310,6 +310,30 @@ TR_J9ByteCodeIterator::printByteCode() } } +void +TR_J9ByteCodeIterator::printByteCodes() + { + printByteCodePrologue(); + for (TR_J9ByteCode bc = first(); bc != J9BCunknown; bc = next()) + { + printByteCode(); + } + printByteCodeEpilogue(); + } + +const char * +TR_J9ByteCodeIterator::currentMnemonic() + { + uint8_t opcode = nextByte(0); + return fe()->getByteCodeName(opcode); + } + +uint8_t +TR_J9ByteCodeIterator::currentOpcode() + { + return nextByte(0); + } + const TR_J9ByteCode TR_J9ByteCodeIterator::_opCodeToByteCodeEnum[] = { /* 0 */ J9BCnop, diff --git a/runtime/compiler/ilgen/J9ByteCodeIterator.hpp b/runtime/compiler/ilgen/J9ByteCodeIterator.hpp index 9607d0b7928..07f0372e254 100644 --- a/runtime/compiler/ilgen/J9ByteCodeIterator.hpp +++ b/runtime/compiler/ilgen/J9ByteCodeIterator.hpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2020 IBM Corp. and others + * Copyright (c) 2000, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -158,6 +158,9 @@ class TR_J9ByteCodeIterator : public TR_ByteCodeIteratorgetSuccessors().size() == 1) || - !temp->asBlock()->getEntry() || - !((temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch() - && !temp->asBlock()->getLastRealTreeTop()->getNode()->getByteCodeInfo().doNotProfile()) || - temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::lookup || - temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::table)) + bool loopVar = false; + if (false == mjit) // TR Compilation { - if (_seenNodes->isSet(temp->getNumber())) + loopVar = (temp->getSuccessors().size() == 1) || + !temp->asBlock()->getEntry() || + !((temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch() + && !temp->asBlock()->getLastRealTreeTop()->getNode()->getByteCodeInfo().doNotProfile()) || + temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::lookup || + temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::table); + } + else // MicroJIT Compilation + { + loopVar = continueLoop(temp); + } + while (loopVar) + { + if (seenNodes->isSet(temp->getNumber())) break; upStack.add(temp); - _seenNodes->set(temp->getNumber()); + seenNodes->set(temp->getNumber()); if (!backEdgeExists && !temp->getPredecessors().empty() && !(temp->getPredecessors().size() == 1)) { @@ -900,9 +911,7 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() if (!temp->getSuccessors().empty()) { if ((temp->getSuccessors().size() == 2) && temp->asBlock() && temp->asBlock()->getNextBlock()) - { temp = temp->asBlock()->getNextBlock(); - } else temp = temp->getSuccessors().front()->getTo(); } @@ -911,58 +920,92 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() } if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Propagation start block_%d\n", temp->getNumber()); + dumpOptDetails(comp(), "Propagation start block_%d\n", temp->getNumber()); if (temp->asBlock()->getEntry()) inlinedSiteIndex = temp->asBlock()->getEntry()->getNode()->getInlinedSiteIndex(); - if ((temp->getSuccessors().size() == 2) && - temp->asBlock()->getEntry() && - temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch() && - !temp->asBlock()->getLastRealTreeTop()->getNode()->getByteCodeInfo().doNotProfile()) - { - getInterpreterProfilerBranchCountersOnDoubleton(temp, &taken, ¬taken); - startFrequency = taken + nottaken; - self()->setEdgeFrequenciesOnNode( temp, taken, nottaken, comp()); - } - else if (temp->asBlock()->getEntry() && - (temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::lookup || - temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::table)) + if (false == mjit) { - startFrequency = _externalProfiler->getSumSwitchCount(temp->asBlock()->getLastRealTreeTop()->getNode(), comp()); - if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Switch with total frequency of %d\n", startFrequency); - setSwitchEdgeFrequenciesOnNode(temp, comp()); + if ((temp->getSuccessors().size() == 2) && + temp->asBlock()->getEntry() && + temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch() && + !temp->asBlock()->getLastRealTreeTop()->getNode()->getByteCodeInfo().doNotProfile()) + { + getInterpreterProfilerBranchCountersOnDoubleton(temp, &taken, ¬taken, mjit); + startFrequency = taken + nottaken; + self()->setEdgeFrequenciesOnNode(temp, taken, nottaken, comp()); + } + else if (temp->asBlock()->getEntry() && + (temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::lookup || + temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::table)) + { + startFrequency = _externalProfiler->getSumSwitchCount(temp->asBlock()->getLastRealTreeTop()->getNode(), comp()); + if (comp()->getOption(TR_TraceBFGeneration)) + dumpOptDetails(comp(), "Switch with total frequency of %d\n", startFrequency); + setSwitchEdgeFrequenciesOnNode(temp, comp()); + } + else + { + if (_calledFrequency > 0) + startFrequency = _calledFrequency; + else if (initialCallScanFreq > 0) + startFrequency = initialCallScanFreq; + else + startFrequency = AVG_FREQ; + + if ((temp->getSuccessors().size() == 2) && (startFrequency > 0) && temp->asBlock()->getEntry() && temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) + self()->setEdgeFrequenciesOnNode( temp, 0, startFrequency, comp()); + else + self()->setUniformEdgeFrequenciesOnNode(temp, startFrequency, false, comp()); + } } else { - if (_calledFrequency > 0) - startFrequency = _calledFrequency; - else if (initialCallScanFreq > 0) - startFrequency = initialCallScanFreq; - else + if ((temp->getSuccessors().size() == 2) && + temp->asBlock()->getEntry() && + isBranch(getLastRealBytecodeOfBlock(temp))) { - startFrequency = AVG_FREQ; + getInterpreterProfilerBranchCountersOnDoubleton(temp, &taken, ¬taken, mjit); + startFrequency = taken + nottaken; + self()->setEdgeFrequenciesOnNode(temp, taken, nottaken, comp()); + } + else if (temp->asBlock()->getEntry() && + isTableOp(getBytecodeFromIndex(temp->asBlock()->getExit()->getNode()->getLocalIndex()))) + { + startFrequency = _externalProfiler->getSumSwitchCount(temp->asBlock()->getExit()->getNode(), comp()); + if (comp()->getOption(TR_TraceBFGeneration)) + dumpOptDetails(comp(), "Switch with total frequency of %d\n", startFrequency); + setSwitchEdgeFrequenciesOnNode(temp, comp()); } - - if ((temp->getSuccessors().size() == 2) && (startFrequency > 0) && temp->asBlock()->getEntry() && temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) - self()->setEdgeFrequenciesOnNode( temp, 0, startFrequency, comp()); else - self()->setUniformEdgeFrequenciesOnNode(temp, startFrequency, false, comp()); + { + if (_calledFrequency > 0) + startFrequency = _calledFrequency; + else if (initialCallScanFreq > 0) + startFrequency = initialCallScanFreq; + else + startFrequency = AVG_FREQ; + + if ((temp->getSuccessors().size() == 2) && (startFrequency > 0) && temp->asBlock()->getEntry() && isBranch(getLastRealBytecodeOfBlock(temp))) + self()->setEdgeFrequenciesOnNode(temp, 0, startFrequency, comp()); + else + self()->setUniformEdgeFrequenciesOnNode(temp, startFrequency, false, comp()); + } } - setBlockFrequency (temp, startFrequency); + setBlockFrequency(temp, startFrequency); _initialBlockFrequency = startFrequency; if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Set frequency of %d on block_%d\n", temp->asBlock()->getFrequency(), temp->getNumber()); + dumpOptDetails(comp(), "Set frequency of %d on block_%d\n", temp->asBlock()->getFrequency(), temp->getNumber()); start = temp; // Walk backwards to the start and propagate this frequency if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Propagating start frequency backwards...\n"); + dumpOptDetails(comp(), "Propagating start frequency backwards...\n"); ListIterator upit(&upStack); for (temp = upit.getFirst(); temp; temp = upit.getNext()) @@ -970,18 +1013,18 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() if (!temp->asBlock()->getEntry()) continue; if ((temp->getSuccessors().size() == 2) && (startFrequency > 0) && temp->asBlock()->getEntry() && temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) - self()->setEdgeFrequenciesOnNode( temp, 0, startFrequency, comp()); + self()->setEdgeFrequenciesOnNode(temp, 0, startFrequency, comp()); else - self()->setUniformEdgeFrequenciesOnNode( temp, startFrequency, false, comp()); - setBlockFrequency (temp, startFrequency); - _seenNodes->set(temp->getNumber()); + self()->setUniformEdgeFrequenciesOnNode(temp, startFrequency, false, comp()); + setBlockFrequency(temp, startFrequency); + seenNodes->set(temp->getNumber()); if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Set frequency of %d on block_%d\n", temp->asBlock()->getFrequency(), temp->getNumber()); + dumpOptDetails(comp(), "Set frequency of %d on block_%d\n", temp->asBlock()->getFrequency(), temp->getNumber()); } if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Propagating block frequency forward...\n"); + dumpOptDetails(comp(), "Propagating block frequency forward...\n"); // Walk reverse post-order // we start at the first if or switch statement @@ -995,10 +1038,10 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() if (comp()->getOption(TR_TraceBFGeneration)) traceMsg(comp(), "Considering block_%d\n", node->getNumber()); - if (_seenNodes->isSet(node->getNumber())) + if (seenNodes->isSet(node->getNumber())) continue; - _seenNodes->set(node->getNumber()); + seenNodes->set(node->getNumber()); if (!node->asBlock()->getEntry()) continue; @@ -1008,11 +1051,11 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() if (node->asBlock()->isCold()) { if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Analyzing COLD block_%d\n", node->getNumber()); + dumpOptDetails(comp(), "Analyzing COLD block_%d\n", node->getNumber()); //node->asBlock()->setFrequency(0); int32_t freq = node->getFrequency(); if ((node->getSuccessors().size() == 2) && (freq > 0) && node->asBlock()->getEntry() && node->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) - self()->setEdgeFrequenciesOnNode( node, 0, freq, comp()); + self()->setEdgeFrequenciesOnNode(node, 0, freq, comp()); else self()->setUniformEdgeFrequenciesOnNode(node, freq, false, comp()); setBlockFrequency (node, freq); @@ -1031,8 +1074,8 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() if (!isVirtualGuard(node->asBlock()->getLastRealTreeTop()->getNode()) && !node->asBlock()->getLastRealTreeTop()->getNode()->getByteCodeInfo().doNotProfile()) { - _seenNodesInCycle->empty(); - getInterpreterProfilerBranchCountersOnDoubleton(node, &taken, ¬taken); + seenNodesInCycle->empty(); + getInterpreterProfilerBranchCountersOnDoubleton(node, &taken, ¬taken, mjit); if ((taken <= 0) && (nottaken <= 0)) { @@ -1040,11 +1083,11 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() nottaken = LOW_FREQ; } - self()->setEdgeFrequenciesOnNode( node, taken, nottaken, comp()); - setBlockFrequency (node, taken + nottaken); + self()->setEdgeFrequenciesOnNode(node, taken, nottaken, comp()); + setBlockFrequency(node, taken + nottaken); if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"If on node %p is not guard I'm using the taken and nottaken counts for producing block frequency\n", node->asBlock()->getLastRealTreeTop()->getNode()); + dumpOptDetails(comp(), "If on node %p is not guard I'm using the taken and nottaken counts for producing block frequency\n", node->asBlock()->getLastRealTreeTop()->getNode()); } else { @@ -1071,7 +1114,7 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() { TR::CFGNode *pred = edge->getFrom(); - if (pred->getFrequency()< 0) + if (pred->getFrequency() < 0) { predNotSet = pred; break; @@ -1079,42 +1122,42 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() } if (predNotSet && - !_seenNodesInCycle->get(node->getNumber())) + !seenNodesInCycle->get(node->getNumber())) { stack.add(predNotSet); - _seenNodesInCycle->set(node->getNumber()); - _seenNodes->reset(node->getNumber()); + seenNodesInCycle->set(node->getNumber()); + seenNodes->reset(node->getNumber()); continue; } if (!predNotSet) - _seenNodesInCycle->empty(); + seenNodesInCycle->empty(); int32_t sumFreq = summarizeFrequencyFromPredecessors(node, self()); if (sumFreq <= 0) sumFreq = AVG_FREQ; - self()->setEdgeFrequenciesOnNode( node, 0, sumFreq, comp()); - setBlockFrequency (node, sumFreq); + self()->setEdgeFrequenciesOnNode(node, 0, sumFreq, comp()); + setBlockFrequency(node, sumFreq); if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"If on node %p is guard I'm using the predecessor frequency sum\n", node->asBlock()->getLastRealTreeTop()->getNode()); + dumpOptDetails(comp(), "If on node %p is guard I'm using the predecessor frequency sum\n", node->asBlock()->getLastRealTreeTop()->getNode()); } if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Set frequency of %d on block_%d\n", node->asBlock()->getFrequency(), node->getNumber()); + dumpOptDetails(comp(), "Set frequency of %d on block_%d\n", node->asBlock()->getFrequency(), node->getNumber()); } else if (node->asBlock()->getEntry() && (node->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::lookup || node->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::table)) { - _seenNodesInCycle->empty(); + seenNodesInCycle->empty(); int32_t sumFreq = _externalProfiler->getSumSwitchCount(node->asBlock()->getLastRealTreeTop()->getNode(), comp()); setSwitchEdgeFrequenciesOnNode(node, comp()); - setBlockFrequency (node, sumFreq); + setBlockFrequency(node, sumFreq); if (comp()->getOption(TR_TraceBFGeneration)) { - dumpOptDetails(comp(),"Found a Switch statement at exit of block_%d\n", node->getNumber()); - dumpOptDetails(comp(),"Set frequency of %d on block_%d\n", node->asBlock()->getFrequency(), node->getNumber()); + dumpOptDetails(comp(), "Found a Switch statement at exit of block_%d\n", node->getNumber()); + dumpOptDetails(comp(), "Set frequency of %d on block_%d\n", node->asBlock()->getFrequency(), node->getNumber()); } } else @@ -1126,43 +1169,43 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() { TR::CFGNode *pred = edge->getFrom(); - if (pred->getFrequency()< 0) + if (pred->getFrequency() < 0) { predNotSet = pred; break; } - int32_t edgeFreq = edge->getFrequency(); - sumFreq += edgeFreq; - } + int32_t edgeFreq = edge->getFrequency(); + sumFreq += edgeFreq; + } if (predNotSet && - !_seenNodesInCycle->get(node->getNumber())) + !seenNodesInCycle->get(node->getNumber())) { - _seenNodesInCycle->set(node->getNumber()); - _seenNodes->reset(node->getNumber()); + seenNodesInCycle->set(node->getNumber()); + seenNodes->reset(node->getNumber()); stack.add(predNotSet); continue; } else { if (!predNotSet) - _seenNodesInCycle->empty(); + seenNodesInCycle->empty(); if (comp()->getOption(TR_TraceBFGeneration)) traceMsg(comp(), "2Setting block and uniform freqs\n"); if ((node->getSuccessors().size() == 2) && (sumFreq > 0) && node->asBlock()->getEntry() && node->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch()) - self()->setEdgeFrequenciesOnNode( node, 0, sumFreq, comp()); + self()->setEdgeFrequenciesOnNode(node, 0, sumFreq, comp()); else - self()->setUniformEdgeFrequenciesOnNode( node, sumFreq, false, comp()); - setBlockFrequency (node, sumFreq); + self()->setUniformEdgeFrequenciesOnNode(node, sumFreq, false, comp()); + setBlockFrequency(node, sumFreq); } if (comp()->getOption(TR_TraceBFGeneration)) { - dumpOptDetails(comp(),"Not an if (or unknown if) at exit of block %d (isSingleton=%d)\n", node->getNumber(), (node->getSuccessors().size() == 1)); - dumpOptDetails(comp(),"Set frequency of %d on block %d\n", node->asBlock()->getFrequency(), node->getNumber()); + dumpOptDetails(comp(), "Not an if (or unknown if) at exit of block %d (isSingleton=%d)\n", node->getNumber(), (node->getSuccessors().size() == 1)); + dumpOptDetails(comp(), "Set frequency of %d on block %d\n", node->asBlock()->getFrequency(), node->getNumber()); } } } @@ -1173,7 +1216,7 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() { TR::CFGNode *succ = edge->getTo(); - if (!_seenNodes->isSet(succ->getNumber())) + if (!seenNodes->isSet(succ->getNumber())) stack.add(succ); else { @@ -1183,7 +1226,7 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() (succ->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::lookup || succ->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::table)))) { - setBlockFrequency ( succ, edge->getFrequency(), true); + setBlockFrequency(succ, edge->getFrequency(), true); // addup this edge to the frequency of the blocks following it // propagate downward until you reach a block that doesn't end in @@ -1191,22 +1234,22 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() if (succ->getSuccessors().size() == 1) { TR::CFGNode *tempNode = succ->getSuccessors().front()->getTo(); - TR_BitVector *_seenGotoNodes = new (trStackMemory()) TR_BitVector(numBlocks, trMemory(), stackAlloc, notGrowable); - _seenGotoNodes->set(succ->getNumber()); + TR_BitVector *seenGotoNodes = new (trStackMemory()) TR_BitVector(numBlocks, trMemory(), stackAlloc, notGrowable); + seenGotoNodes->set(succ->getNumber()); while ((tempNode->getSuccessors().size() == 1) && (tempNode != succ) && (tempNode != getEnd()) && - !_seenGotoNodes->isSet(tempNode->getNumber())) + !seenGotoNodes->isSet(tempNode->getNumber())) { TR::CFGNode *nextTempNode = tempNode->getSuccessors().front()->getTo(); if (comp()->getOption(TR_TraceBFGeneration)) traceMsg(comp(), "3Setting block and uniform freqs\n"); - self()->setUniformEdgeFrequenciesOnNode( tempNode, edge->getFrequency(), true, comp()); - setBlockFrequency (tempNode, edge->getFrequency(), true); + self()->setUniformEdgeFrequenciesOnNode(tempNode, edge->getFrequency(), true, comp()); + setBlockFrequency(tempNode, edge->getFrequency(), true); - _seenGotoNodes->set(tempNode->getNumber()); + seenGotoNodes->set(tempNode->getNumber()); tempNode = nextTempNode; } } @@ -1240,9 +1283,9 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() for (node = getFirstNode(); node; node=node->getNext()) { if ((node == getEnd()) || (node == start)) - node->asBlock()->setFrequency(0); + node->asBlock()->setFrequency(0); - if (_seenNodes->isSet(node->getNumber())) + if (seenNodes->isSet(node->getNumber())) continue; if (node->asBlock()->getEntry() && @@ -1269,7 +1312,7 @@ J9::CFG::setBlockFrequenciesBasedOnInterpreterProfiler() void J9::CFG::computeInitialBlockFrequencyBasedOnExternalProfiler(TR::Compilation *comp) { - TR_ExternalProfiler* profiler = comp->fej9()->hasIProfilerBlockFrequencyInfo(*comp); + TR_ExternalProfiler *profiler = comp->fej9()->hasIProfilerBlockFrequencyInfo(*comp); if (!profiler) { _initialBlockFrequency = AVG_FREQ; @@ -1281,7 +1324,7 @@ J9::CFG::computeInitialBlockFrequencyBasedOnExternalProfiler(TR::Compilation *co TR::StackMemoryRegion stackMemoryRegion(*trMemory()); int32_t numBlocks = getNextNodeNumber(); - TR_BitVector *_seenNodes = new (trStackMemory()) TR_BitVector(numBlocks, trMemory(), stackAlloc, notGrowable); + TR_BitVector *seenNodes = new (trStackMemory()) TR_BitVector(numBlocks, trMemory(), stackAlloc, notGrowable); _frequencySet = new (trHeapMemory()) TR_BitVector(numBlocks, trMemory(), heapAlloc, notGrowable); int32_t startFrequency = AVG_FREQ; int32_t taken = AVG_FREQ; @@ -1303,10 +1346,10 @@ J9::CFG::computeInitialBlockFrequencyBasedOnExternalProfiler(TR::Compilation *co temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::lookup || temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCodeValue() == TR::table)) { - if (_seenNodes->isSet(temp->getNumber())) + if (seenNodes->isSet(temp->getNumber())) break; - _seenNodes->set(temp->getNumber()); + seenNodes->set(temp->getNumber()); if (!temp->getPredecessors().empty() && !(temp->getPredecessors().size() == 1)) backEdgeExists = true; @@ -1323,9 +1366,7 @@ J9::CFG::computeInitialBlockFrequencyBasedOnExternalProfiler(TR::Compilation *co if (!temp->getSuccessors().empty()) { if ((temp->getSuccessors().size() == 2) && temp->asBlock() && temp->asBlock()->getNextBlock()) - { temp = temp->asBlock()->getNextBlock(); - } else temp = temp->getSuccessors().front()->getTo(); } @@ -1341,7 +1382,7 @@ J9::CFG::computeInitialBlockFrequencyBasedOnExternalProfiler(TR::Compilation *co temp->asBlock()->getLastRealTreeTop()->getNode()->getOpCode().isBranch() && !temp->asBlock()->getLastRealTreeTop()->getNode()->getByteCodeInfo().doNotProfile()) { - getInterpreterProfilerBranchCountersOnDoubleton(temp, &taken, ¬taken); + getInterpreterProfilerBranchCountersOnDoubleton(temp, &taken, ¬taken, false); if ((taken <= 0) && (nottaken <= 0)) { taken = LOW_FREQ; @@ -1362,9 +1403,7 @@ J9::CFG::computeInitialBlockFrequencyBasedOnExternalProfiler(TR::Compilation *co else if (initialCallScanFreq > 0) startFrequency = initialCallScanFreq; else - { startFrequency = AVG_FREQ; - } } if (startFrequency <= 0) @@ -1405,9 +1444,13 @@ getParentCallCount(TR::CFG *cfg, TR::Node *node) void -J9::CFG::getInterpreterProfilerBranchCountersOnDoubleton(TR::CFGNode *cfgNode, int32_t *taken, int32_t *nottaken) +J9::CFG::getInterpreterProfilerBranchCountersOnDoubleton(TR::CFGNode *cfgNode, int32_t *taken, int32_t *nottaken, bool mjitFlag) { - TR::Node *node = cfgNode->asBlock()->getLastRealTreeTop()->getNode(); + TR::Node *node; + if (false == mjitFlag) + node = cfgNode->asBlock()->getLastRealTreeTop()->getNode(); + else // We don't have Real Tree Tops in MicroJIT, so we need to work around that + node = cfgNode->asBlock()->getExit()->getNode(); if (this != comp()->getFlowGraph()) { @@ -1422,7 +1465,7 @@ J9::CFG::getInterpreterProfilerBranchCountersOnDoubleton(TR::CFGNode *cfgNode, i if (*taken || *nottaken) { if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"If on node %p has branch counts: taken=%d, not taken=%d\n", node, *taken, *nottaken); + dumpOptDetails(comp(), "If on node %p has branch counts: taken=%d, not taken=%d\n", node, *taken, *nottaken); } else if (isVirtualGuard(node)) { @@ -1434,7 +1477,7 @@ J9::CFG::getInterpreterProfilerBranchCountersOnDoubleton(TR::CFGNode *cfgNode, i *nottaken = sumFreq; if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"Guard on node %p has default branch counts: taken=%d, not taken=%d\n", node, *taken, *nottaken); + dumpOptDetails(comp(), "Guard on node %p has default branch counts: taken=%d, not taken=%d\n", node, *taken, *nottaken); } else if (!cfgNode->asBlock()->isCold()) { @@ -1473,7 +1516,7 @@ J9::CFG::getInterpreterProfilerBranchCountersOnDoubleton(TR::CFGNode *cfgNode, i } */ if (comp()->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp(),"If with no profiling information on node %p has low branch counts: taken=%d, not taken=%d\n", node, *taken, *nottaken); + dumpOptDetails(comp(), "If with no profiling information on node %p has low branch counts: taken=%d, not taken=%d\n", node, *taken, *nottaken); } } @@ -1495,44 +1538,49 @@ J9::CFG::setSwitchEdgeFrequenciesOnNode(TR::CFGNode *node, TR::Compilation *comp { TR::Block *block = node->asBlock(); TR::Node *treeNode = node->asBlock()->getLastRealTreeTop()->getNode(); + /* This and a few other methods here in TR differ from their MJIT counterparts only in whether the TR::Node + used for information about bytecode is in getLastRealTreeTop() or getExit() on the asBlock() of the CFGNode. + Hence, use the following if this needs to be implemented for MicroJIT: + TR::Node *treeNode = node->asBlock()->getExit()->getNode(); + */ int32_t sumFrequency = _externalProfiler->getSumSwitchCount(treeNode, comp); if (sumFrequency < 10) { if (comp->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp,"Low count switch I'll set frequencies using uniform edge distribution\n"); + dumpOptDetails(comp, "Low count switch I'll set frequencies using uniform edge distribution\n"); - self()->setUniformEdgeFrequenciesOnNode (node, sumFrequency, false, comp); + self()->setUniformEdgeFrequenciesOnNode(node, sumFrequency, false, comp); return; } if (treeNode->getInlinedSiteIndex() < -1) { if (comp->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp,"Dummy switch generated in estimate code size I'll set frequencies using uniform edge distribution\n"); + dumpOptDetails(comp, "Dummy switch generated in estimate code size I'll set frequencies using uniform edge distribution\n"); - self()->setUniformEdgeFrequenciesOnNode (node, sumFrequency, false, comp); + self()->setUniformEdgeFrequenciesOnNode(node, sumFrequency, false, comp); return; } if (_externalProfiler->isSwitchProfileFlat(treeNode, comp)) { if (comp->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp,"Flat profile switch, setting average frequency on each case.\n"); + dumpOptDetails(comp, "Flat profile switch, setting average frequency on each case.\n"); self()->setUniformEdgeFrequenciesOnNode(node, _externalProfiler->getFlatSwitchProfileCounts(treeNode, comp), false, comp); return; } - for ( int32_t count=1; count < treeNode->getNumChildren(); count++) + for (int32_t count=1; count < treeNode->getNumChildren(); count++) { TR::Node *child = treeNode->getChild(count); TR::CFGEdge *e = getCFGEdgeForNode(node, child); int32_t frequency = _externalProfiler->getSwitchCountForValue (treeNode, (count-1), comp); - e->setFrequency( std::max(frequency,1) ); + e->setFrequency(std::max(frequency,1)); if (comp->getOption(TR_TraceBFGeneration)) - dumpOptDetails(comp,"Edge %p between %d and %d has freq %d (Switch)\n", e, e->getFrom()->getNumber(), e->getTo()->getNumber(), e->getFrequency()); + dumpOptDetails(comp, "Edge %p between %d and %d has freq %d (Switch)\n", e, e->getFrom()->getNumber(), e->getTo()->getNumber(), e->getFrequency()); } } @@ -1707,10 +1755,19 @@ void J9::CFG::propagateFrequencyInfoFromExternalProfiler(TR_ExternalProfiler *profiler) { _externalProfiler = profiler; - +#if defined(J9VM_OPT_MICROJIT) + J9Method *method = static_cast(comp()->getMethodBeingCompiled())->ramMethod(); + if (TR::Options::getJITCmdLineOptions()->_mjitEnabled && comp()->fej9()->isMJITExtendedFlagsMethod(method) && comp()->getMethodBeingCompiled()->isInterpreted()) + { + if (!profiler) + return; + self()->setBlockFrequenciesBasedOnInterpreterProfiler(true); + return; + } +#endif /* J9VM_OPT_MICROJIT */ if (profiler) { - self()->setBlockFrequenciesBasedOnInterpreterProfiler(); + self()->setBlockFrequenciesBasedOnInterpreterProfiler(false); return; } @@ -1763,3 +1820,81 @@ J9::CFG::emitVerbosePseudoRandomFrequencies() comp()->fej9()->emitNewPseudoRandomVerboseSuffix(); return true; } + + +bool +J9::CFG::isBranch(TR_J9ByteCode bc) + { + switch(bc) + { + case J9BCifeq: + case J9BCifne: + case J9BCiflt: + case J9BCifge: + case J9BCifgt: + case J9BCifle: + case J9BCificmpeq: + case J9BCificmpne: + case J9BCificmplt: + case J9BCificmpge: + case J9BCificmpgt: + case J9BCificmple: + case J9BCifacmpeq: + case J9BCifacmpne: + case J9BCifnull: + case J9BCifnonnull: + return true; + default: + return false; + } + } + + +bool +J9::CFG::isTableOp(TR_J9ByteCode bc) + { + switch(bc) + { + case J9BClookupswitch: + case J9BCtableswitch: + return true; + default: + return false; + } + } + + +/* These methods all use MicroJIT, so we cannot make all the assumptions about + * availability of meta-data structures (such as IL Trees) that TR can usually make. + * To that end, there are no real tree tops in this CFG, so we must use the exit + * nodes to get the last bytecode index for a given block, and from that get the bytecode. + * Since this will be done no less than 3 times, a helper function is provided. + */ +TR_J9ByteCode +J9::CFG::getBytecodeFromIndex(int32_t index) + { + TR_ResolvedJ9Method *method = static_cast(_method->getResolvedMethod()); + TR_J9VMBase *fe = comp()->fej9(); + TR_J9ByteCodeIterator bcIterator(_method, method, fe, comp()); + bcIterator.setIndex(index); + return bcIterator.next(); + } + + +TR_J9ByteCode +J9::CFG::getLastRealBytecodeOfBlock(TR::CFGNode *start) + { + return getBytecodeFromIndex(start->asBlock()->getExit()->getNode()->getLocalIndex()); + } + + +bool +J9::CFG::continueLoop(TR::CFGNode *temp) + { + if ((temp->getSuccessors().size() == 1) && !temp->asBlock()->getEntry()) + { + TR_J9ByteCode bc = getLastRealBytecodeOfBlock(temp); + return !(isBranch(bc) || isTableOp(bc)); + } + return false; + } diff --git a/runtime/compiler/infra/J9Cfg.hpp b/runtime/compiler/infra/J9Cfg.hpp index 5f09b558eda..83f514a2839 100644 --- a/runtime/compiler/infra/J9Cfg.hpp +++ b/runtime/compiler/infra/J9Cfg.hpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2019 IBM Corp. and others + * Copyright (c) 2000, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -39,6 +39,7 @@ namespace J9 { typedef J9::CFG CFGConnector; } #include "cs2/listof.h" #include "env/TRMemory.hpp" #include "il/Node.hpp" +#include "ilgen/J9ByteCode.hpp" #include "infra/Assert.hpp" #include "infra/List.hpp" #include "infra/TRCfgEdge.hpp" @@ -86,10 +87,10 @@ class CFG : public OMR::CFGConnector void setBlockAndEdgeFrequenciesBasedOnStructure(); TR_BitVector *setBlockAndEdgeFrequenciesBasedOnJITProfiler(); - void setBlockFrequenciesBasedOnInterpreterProfiler(); + void setBlockFrequenciesBasedOnInterpreterProfiler(bool mjit); // mjit set to true for MicroJIT compilations; false otherwise void computeInitialBlockFrequencyBasedOnExternalProfiler(TR::Compilation *comp); void propagateFrequencyInfoFromExternalProfiler(TR_ExternalProfiler *profiler); - void getInterpreterProfilerBranchCountersOnDoubleton(TR::CFGNode *cfgNode, int32_t *taken, int32_t *nottaken); + void getInterpreterProfilerBranchCountersOnDoubleton(TR::CFGNode *cfgNode, int32_t *taken, int32_t *nottaken, bool mjitFlag); void setSwitchEdgeFrequenciesOnNode(TR::CFGNode *node, TR::Compilation *comp); void setBlockFrequency(TR::CFGNode *node, int32_t frequency, bool addFrequency = false); int32_t scanForFrequencyOnSimpleMethod(TR::TreeTop *tt, TR::TreeTop *endTT); @@ -98,6 +99,11 @@ class CFG : public OMR::CFGConnector void getBranchCountersFromProfilingData(TR::Node *node, TR::Block *block, int32_t *taken, int32_t *notTaken); bool emitVerbosePseudoRandomFrequencies(); + bool isBranch(TR_J9ByteCode bc); + bool isTableOp(TR_J9ByteCode bc); + TR_J9ByteCode getBytecodeFromIndex(int32_t index); // Helper for getting bytecode from an index + TR_J9ByteCode getLastRealBytecodeOfBlock(TR::CFGNode *start); // Helper for getting last byteode in the CFGNode's block + bool continueLoop(TR::CFGNode *temp); protected: diff --git a/runtime/compiler/microjit/CMakeLists.txt b/runtime/compiler/microjit/CMakeLists.txt new file mode 100644 index 00000000000..bebab4ae699 --- /dev/null +++ b/runtime/compiler/microjit/CMakeLists.txt @@ -0,0 +1,41 @@ +################################################################################ +# Copyright (c) 2022, 2022 IBM Corp. and others +# +# This program and the accompanying materials are made available under +# the terms of the Eclipse Public License 2.0 which accompanies this +# distribution and is available at https://www.eclipse.org/legal/epl-2.0/ +# or the Apache License, Version 2.0 which accompanies this distribution and +# is available at https://www.apache.org/licenses/LICENSE-2.0. +# +# This Source Code may also be made available under the following +# Secondary Licenses when the conditions for such availability set +# forth in the Eclipse Public License, v. 2.0 are satisfied: GNU +# General Public License, version 2 with the GNU Classpath +# Exception [1] and GNU General Public License, version 2 with the +# OpenJDK Assembly Exception [2]. +# +# [1] https://www.gnu.org/software/classpath/license.html +# [2] http://openjdk.java.net/legal/assembly-exception.html +# +# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception +################################################################################ +if(J9VM_OPT_MJIT_Standalone) +j9jit_files( + microjit/ExceptionTable.cpp + microjit/CompilationInfoPerThreadBase.cpp + microjit/assembly/utils.nasm + microjit/LWMetaDataCreation.cpp +) +else() +j9jit_files( + microjit/ExceptionTable.cpp + microjit/CompilationInfoPerThreadBase.cpp + microjit/assembly/utils.nasm + microjit/HWMetaDataCreation.cpp +) +endif() +if(TR_HOST_ARCH STREQUAL "x") + if(TR_TARGET_ARCH STREQUAL "x") + add_subdirectory(x) + endif() +endif() diff --git a/runtime/compiler/microjit/CompilationInfoPerThreadBase.cpp b/runtime/compiler/microjit/CompilationInfoPerThreadBase.cpp new file mode 100644 index 00000000000..51b1b58d0e1 --- /dev/null +++ b/runtime/compiler/microjit/CompilationInfoPerThreadBase.cpp @@ -0,0 +1,362 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at http://eclipse.org/legal/epl-2.0 + * or the Apache License, Version 2.0 which accompanies this distribution + * and is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the + * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, + * version 2 with the GNU Classpath Exception [1] and GNU General Public + * License, version 2 with the OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ + +#include "control/CompilationThread.hpp" +#include "microjit/x/amd64/AMD64Codegen.hpp" +#include "control/CompilationRuntime.hpp" +#include "compile/Compilation.hpp" +#include "codegen/OMRCodeGenerator.hpp" +#include "env/VerboseLog.hpp" +#include "runtime/CodeCacheTypes.hpp" +#include "runtime/CodeCache.hpp" +#include "runtime/CodeCacheManager.hpp" + +#if defined(J9VM_OPT_MICROJIT) + +static void +printCompFailureInfo(TR::Compilation * comp, const char * reason) + { + if (comp && comp->getOptions()->getAnyOption(TR_TraceAll)) + traceMsg(comp, "\n=== EXCEPTION THROWN (%s) ===\n", reason); + } + +// MicroJIT returns a code size of 0 when it encounters a compilation error +// We must use this to set the correct values and fail compilation, this is +// the same no matter which phase of compilation fails. +// This function will bubble the exception up to the caller after clean up. +static void +testMicroJITCompilationForErrors( + uintptr_t code_size, + J9Method *method, + J9JITConfig *jitConfig, + J9VMThread *vmThread, + TR::Compilation *compiler, + const char *reason) + { + if (0 == code_size) + { + J9ROMMethod *romMethod = J9_ROM_METHOD_FROM_RAM_METHOD(method); + intptr_t trCountFromMJIT = J9ROMMETHOD_HAS_BACKWARDS_BRANCHES(romMethod) ? TR_DEFAULT_INITIAL_BCOUNT : TR_DEFAULT_INITIAL_COUNT; + trCountFromMJIT = (trCountFromMJIT << 1) | 1; + TR_J9VMBase *fe = TR_J9VMBase::get(jitConfig, vmThread); + uint8_t *extendedFlags = fe->fetchMethodExtendedFlagsPointer(method); + *extendedFlags = *extendedFlags | J9_MJIT_FAILED_COMPILE; + method->extra = reinterpret_cast(trCountFromMJIT); + compiler->failCompilation("Cannot compile with MicroJIT because %s.", reason); + } + } + +// This routine should only be called from wrappedCompile +TR_MethodMetaData * +TR::CompilationInfoPerThreadBase::mjit( + J9VMThread *vmThread, + TR::Compilation *compiler, + TR_ResolvedMethod *compilee, + TR_J9VMBase &vm, + TR_OptimizationPlan *optimizationPlan, + TR::SegmentAllocator const &scratchSegmentProvider, + TR_Memory *trMemory + ) + { + + PORT_ACCESS_FROM_JITCONFIG(jitConfig); + + TR_MethodMetaData *metaData = NULL; + J9Method *method = NULL; + TR::CodeCache *codeCache = NULL; + + if (_methodBeingCompiled->_priority >= CP_SYNC_MIN) + ++_compInfo._numSyncCompilations; + else + ++_compInfo._numAsyncCompilations; + + if (_methodBeingCompiled->isDLTCompile()) + testMicroJITCompilationForErrors(0, method, jitConfig, vmThread, compiler, "MicroJIT does not support DLT compilations"); + + bool volatile haveLockedClassUnloadMonitor = false; // used for compilation without VM access + int32_t estimated_size = 0; + char* buffer = NULL; + try + { + + InterruptibleOperation compilingMethodBody(*this); + + TR::IlGeneratorMethodDetails & details = _methodBeingCompiled->getMethodDetails(); + method = details.getMethod(); + + uint8_t *extendedFlags = static_cast (&vm)->fetchMethodExtendedFlagsPointer(method); + const char* signature = compilee->signature(compiler->trMemory()); + + TRIGGER_J9HOOK_JIT_COMPILING_START(_jitConfig->hookInterface, vmThread, method); + + // BEGIN MICROJIT + TR::FilePointer *logFileFP = getCompilation()->getOutFile(); + TR_J9ByteCodeIterator bcIterator(0, static_cast (compilee), static_cast (&vm), comp()); + + if (TR::Options::canJITCompile()) + { + TR::Options *options = TR::Options::getJITCmdLineOptions(); + + J9ROMMethod *romMethod = J9_ROM_METHOD_FROM_RAM_METHOD(method); + U_16 maxLength = J9UTF8_LENGTH(J9ROMMETHOD_SIGNATURE(romMethod)); + if (compiler->getOption(TR_TraceCG)) + trfprintf(logFileFP, "\n------------------------------\nMicroJIT Compiling %s...\n------------------------------\n", signature); + if ((compilee->isConstructor())) + testMicroJITCompilationForErrors(0, method, jitConfig, vmThread, compiler, "MicroJIT does not currently compile constructors"); + + bool isStaticMethod = compilee->isStatic(); // To know if the method we are compiling is static or not + if (!isStaticMethod) + maxLength += 1; // Make space for an extra character for object reference in case of non-static methods + + char typeString[maxLength]; + if (MJIT::nativeSignature(method, typeString)) + return 0; + + // To insert 'L' for object reference with non-static methods + // before the input arguments starting at index 3. + // The last iteration sets typeString[3] to typeString[2], + // which is always MJIT::CLASSNAME_TYPE_CHARACTER, i.e., 'L' + // and other iterations just move the characters one index down. + // e.g. xxIIB -> + if (!isStaticMethod) + { + for (int i = maxLength - 1; i > 2 ; i--) + typeString[i] = typeString[i-1]; + } + + U_16 paramCount = MJIT::getParamCount(typeString, maxLength); + U_16 actualParamCount = MJIT::getActualParamCount(typeString, maxLength); + MJIT::ParamTableEntry paramTableEntries[actualParamCount*2]; + + for (int i=0; imaxBytecodeIndex(); + + MJIT::CodeGenGC mjitCGGC(logFileFP); + MJIT::CodeGenerator mjit_cg(_jitConfig, vmThread, logFileFP, vm, ¶mTable, compiler, &mjitCGGC, comp()->getPersistentInfo(), trMemory, compilee); + estimated_size = MAX_BUFFER_SIZE(maxBCI); + buffer = (char*)mjit_cg.allocateCodeCache(estimated_size, &vm, vmThread); + testMicroJITCompilationForErrors((uintptr_t)buffer, method, jitConfig, vmThread, compiler, "code cache allocation failed"); + codeCache = mjit_cg.getCodeCache(); + + // provide enough space for CodeCacheMethodHeader + char *cursor = buffer; + + buffer_size_t buffer_size = 0; + + char *magicWordLocation, *first2BytesPatchLocation, *samplingRecompileCallLocation; + buffer_size_t code_size = mjit_cg.generatePrePrologue(cursor, method, &magicWordLocation, &first2BytesPatchLocation, &samplingRecompileCallLocation); + testMicroJITCompilationForErrors((uintptr_t)code_size, method, jitConfig, vmThread, compiler, "generatePrePrologue failed"); + + compiler->cg()->setPrePrologueSize(code_size); + buffer_size += code_size; + MJIT_ASSERT(logFileFP, buffer_size < MAX_BUFFER_SIZE(maxBCI), "Buffer overflow after pre-prologue"); + +#ifdef MJIT_DEBUG + trfprintf(logFileFP, "\ngeneratePrePrologue\n"); + for (int32_t i = 0; i < code_size; i++) + trfprintf(logFileFP, "%02x\n", ((unsigned char)cursor[i]) & (unsigned char)0xff); + +#endif + cursor += code_size; + + // start point should point to prolog + char *prologue_address = cursor; + + // generate debug breakpoint + if (comp()->getOption(TR_EntryBreakPoints)) + { + code_size = mjit_cg.generateDebugBreakpoint(cursor); + cursor += code_size; + buffer_size += code_size; + } + + char *jitStackOverflowPatchLocation = NULL; + + mjit_cg.setPeakStackSize(romMethod->maxStack * mjit_cg.getPointerSize()); + char *firstInstructionLocation = NULL; + + code_size = mjit_cg.generatePrologue(cursor, method, &jitStackOverflowPatchLocation, magicWordLocation, first2BytesPatchLocation, samplingRecompileCallLocation, &firstInstructionLocation, &bcIterator); + testMicroJITCompilationForErrors((uintptr_t)code_size, method, jitConfig, vmThread, compiler, "generatePrologue failed"); + + TR::GCStackAtlas *atlas = mjit_cg.getStackAtlas(); + compiler->cg()->setStackAtlas(atlas); + compiler->cg()->setMethodStackMap(atlas->getLocalMap()); + // TODO: MicroJIT: Find out why setting this correctly causes the startPC to report the jitToJit startPC and not the interpreter entry point + // compiler->cg()->setJitMethodEntryPaddingSize((uint32_t)(firstInstructionLocation-cursor)); + compiler->cg()->setJitMethodEntryPaddingSize((uint32_t)(0)); + + buffer_size += code_size; + MJIT_ASSERT(logFileFP, buffer_size < MAX_BUFFER_SIZE(maxBCI), "Buffer overflow after prologue"); + + if (compiler->getOption(TR_TraceCG)) + { + trfprintf(logFileFP, "\ngeneratePrologue\n"); + for (int32_t i = 0; i < code_size; i++) + trfprintf(logFileFP, "%02x\n", ((unsigned char)cursor[i]) & (unsigned char)0xff); + } + + cursor += code_size; + + // GENERATE BODY + bcIterator.setIndex(0); + + TR_Debug dbg(getCompilation()); + getCompilation()->setDebug(&dbg); + + if (compiler->getOption(TR_TraceCG)) + trfprintf(logFileFP, "\n%s\n", signature); + + code_size = mjit_cg.generateBody(cursor, &bcIterator); + testMicroJITCompilationForErrors((uintptr_t)code_size, method, jitConfig, vmThread, compiler, "generateBody failed"); + + buffer_size += code_size; + MJIT_ASSERT(logFileFP, buffer_size < MAX_BUFFER_SIZE(maxBCI), "Buffer overflow after body"); + +#ifdef MJIT_DEBUG + trfprintf(logFileFP, "\ngenerateBody\n"); + for (int32_t i = 0; i < code_size; i++) + trfprintf(logFileFP, "%02x\n", ((unsigned char)cursor[i]) & (unsigned char)0xff); +#endif + + cursor += code_size; + // END GENERATE BODY + + // GENERATE COLD AREA + code_size = mjit_cg.generateColdArea(cursor, method, jitStackOverflowPatchLocation); + testMicroJITCompilationForErrors((uintptr_t)code_size, method, jitConfig, vmThread, compiler, "generateColdArea failed"); + + buffer_size += code_size; + MJIT_ASSERT(logFileFP, buffer_size < MAX_BUFFER_SIZE(maxBCI), "Buffer overflow after cold-area"); + +#ifdef MJIT_DEBUG + trfprintf(logFileFP, "\ngenerateColdArea\n"); + for (int32_t i = 0; i < code_size; i++) + trfprintf(logFileFP, "%02x\n", ((unsigned char)cursor[i]) & (unsigned char)0xff); + + trfprintf(logFileFP, "\nfinal method\n"); + for (int32_t i = 0; i < buffer_size; i++) + trfprintf(logFileFP, "%02x\n", ((unsigned char)buffer[i]) & (unsigned char)0xff); +#endif + + cursor += code_size; + + // As the body is finished, mark its profile info as active so that the JProfiler thread will inspect it + + // TODO: MicroJIT: after adding profiling support, uncomment this. + // if (bodyInfo && bodyInfo->getProfileInfo()) + // { + // bodyInfo->getProfileInfo()->setActive(); + // } + + // Put a metaData pointer into the Code Cache Header(s). + compiler->cg()->setBinaryBufferCursor((uint8_t*)(cursor)); + compiler->cg()->setBinaryBufferStart((uint8_t*)(buffer)); + if (compiler->getOption(TR_TraceCG)) + trfprintf(logFileFP, "Compiled method binary buffer finalized [" POINTER_PRINTF_FORMAT " : " POINTER_PRINTF_FORMAT "] for %s @ %s", ((void*)buffer), ((void*)cursor), compiler->signature(), compiler->getHotnessName()); + + metaData = createMJITMethodMetaData(vm, compilee, compiler); + if (!metaData) + { + if (TR::Options::getVerboseOption(TR_VerboseCompilationDispatch)) + TR_VerboseLog::writeLineLocked(TR_Vlog_DISPATCH, "Failed to create metadata for %s @ %s", compiler->signature(), compiler->getHotnessName()); + compiler->failCompilation("Metadata creation failure"); + } + if (TR::Options::getVerboseOption(TR_VerboseCompilationDispatch)) + TR_VerboseLog::writeLineLocked(TR_Vlog_DISPATCH, "Successfully created metadata [" POINTER_PRINTF_FORMAT "] for %s @ %s", metaData, compiler->signature(), compiler->getHotnessName()); + setMetadata(metaData); + uint8_t *warmMethodHeader = compiler->cg()->getBinaryBufferStart() - sizeof(OMR::CodeCacheMethodHeader); + reinterpret_cast(warmMethodHeader)->_metaData = metaData; + // FAR: should we do postpone this copying until after CHTable commit? + metaData->runtimeAssumptionList = *(compiler->getMetadataAssumptionList()); + + TR_CHTable *chTable = compiler->getCHTable(); + TR_ASSERT_FATAL(!chTable || chTable->canSkipCommit(compiler), "MicroJIT should not use CHTable"); + + if (prologue_address && compiler->getOption(TR_TraceCG)) + trfprintf(logFileFP, "\nMJIT:%s\n", compilee->signature(compiler->trMemory())); + + compiler->cg()->getCodeCache()->trimCodeMemoryAllocation(buffer, (cursor-buffer)); + + if (compiler->getOption(TR_TraceCG)) + trfprintf(logFileFP, "\n------------------------------\nMicroJIT Compiled %s Successfully!\n------------------------------\n", signature); + logCompilationSuccess(vmThread, vm, method, scratchSegmentProvider, compilee, compiler, metaData, optimizationPlan); + } + // END MICROJIT + + TRIGGER_J9HOOK_JIT_COMPILING_END(_jitConfig->hookInterface, vmThread, method); + } + catch (const std::exception &e) + { + const char *exceptionName; + +#if defined(J9ZOS390) + // Compiling with -Wc,lp64 results in a crash on z/OS when trying + // to call the what() virtual method of the exception. + exceptionName = "std::exception"; +#else + exceptionName = e.what(); +#endif + + printCompFailureInfo(compiler, exceptionName); + processException(vmThread, scratchSegmentProvider, compiler, haveLockedClassUnloadMonitor, exceptionName); + if (codeCache) + { + if (estimated_size && buffer) + { + codeCache->trimCodeMemoryAllocation(buffer, 1); + } + TR::CodeCacheManager::instance()->unreserveCodeCache(codeCache); + } + metaData = 0; + } + + // At this point the compilation has either succeeded and compilation cannot be + // interrupted anymore, or it has failed. In either case _compilationShouldBeInterrupted flag + // is not needed anymore + setCompilationShouldBeInterrupted(0); + + // We should not have the classTableMutex at this point + TR_ASSERT_FATAL(!TR::MonitorTable::get()->getClassTableMutex()->owned_by_self(), + "Should not still own classTableMutex"); + + // Increment the number of JIT compilations (either successful or not) + // performed by this compilation thread + incNumJITCompilations(); + + return metaData; + } +#endif /* J9VM_OPT_MICROJIT */ diff --git a/runtime/compiler/microjit/ExceptionTable.cpp b/runtime/compiler/microjit/ExceptionTable.cpp new file mode 100644 index 00000000000..9ca96442f41 --- /dev/null +++ b/runtime/compiler/microjit/ExceptionTable.cpp @@ -0,0 +1,83 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at http://eclipse.org/legal/epl-2.0 + * or the Apache License, Version 2.0 which accompanies this distribution + * and is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the + * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, + * version 2 with the GNU Classpath Exception [1] and GNU General Public + * License, version 2 with the OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#include +#include "microjit/ExceptionTable.hpp" +#include "compile/Compilation.hpp" +#include "env/TRMemory.hpp" +#include "env/jittypes.h" +#include "il/Block.hpp" +#include "il/Node.hpp" +#include "il/TreeTop.hpp" +#include "il/TreeTop_inlines.hpp" +#include "infra/Array.hpp" +#include "infra/Assert.hpp" +#include "infra/List.hpp" +#include "infra/CfgEdge.hpp" + +class TR_ResolvedMethod; + +// TODO: MicroJIT: This should create an exception table using only information MicroJIT has +MJIT::ExceptionTableEntryIterator::ExceptionTableEntryIterator(TR::Compilation *comp) + : _compilation(comp) + { + int32_t i = 1; + _tableEntries = (TR_Array > *)comp->trMemory()->allocateHeapMemory(sizeof(TR_Array >)*i); + for (int32_t j = 0; j < i; ++j) + _tableEntries[j].init(comp->trMemory()); + + // lookup catch blocks + // + + // for each exception block: + // create exception ranges from the list of exception predecessors + + } + +// TODO: MicroJIT: This should create an exception table entry using only information MicroJIT has +void +MJIT::ExceptionTableEntryIterator::addSnippetRanges( + List & tableEntries, + TR::Block *snippetBlock, + TR::Block *catchBlock, + uint32_t catchType, + TR_ResolvedMethod *method, + TR::Compilation *comp) + { + // TODO: MicroJIT: Create a snippet for each exception and add it to the correct place + } + +TR_ExceptionTableEntry * +MJIT::ExceptionTableEntryIterator::getFirst() + { + return NULL; + } + +TR_ExceptionTableEntry * +MJIT::ExceptionTableEntryIterator::getNext() + { + return NULL; + } + +uint32_t +MJIT::ExceptionTableEntryIterator::size() + { + return 0; + } diff --git a/runtime/compiler/microjit/ExceptionTable.hpp b/runtime/compiler/microjit/ExceptionTable.hpp new file mode 100644 index 00000000000..118765abbf2 --- /dev/null +++ b/runtime/compiler/microjit/ExceptionTable.hpp @@ -0,0 +1,61 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at http://eclipse.org/legal/epl-2.0 + * or the Apache License, Version 2.0 which accompanies this distribution + * and is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the + * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, + * version 2 with the GNU Classpath Exception [1] and GNU General Public + * License, version 2 with the OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ + +#ifndef MJIT_EXCEPTIONTABLE_INCL +#define MJIT_EXCEPTIONTABLE_INCL + +#include +#include "infra/Array.hpp" +#include "env/TRMemory.hpp" +#include "env/jittypes.h" +#include "infra/List.hpp" +#include "env/ExceptionTable.hpp" + +class TR_ResolvedMethod; +namespace TR { class Block; } +namespace TR { class Compilation; } +template class TR_Array; + +namespace MJIT +{ + +struct ExceptionTableEntryIterator + { + ExceptionTableEntryIterator(TR::Compilation *comp); + + TR_ExceptionTableEntry *getFirst(); + TR_ExceptionTableEntry *getNext(); + + uint32_t size(); + + private: + void addSnippetRanges(List &, TR::Block *, TR::Block *, uint32_t, TR_ResolvedMethod *, TR::Compilation *); + + TR::Compilation * _compilation; + TR_Array > * _tableEntries; + ListIterator _entryIterator; + int32_t _inlineDepth; + uint32_t _handlerIndex; + }; + +} // namespace MJIT + +#endif /* MJIT_EXCEPTIONTABLE_INCL */ diff --git a/runtime/compiler/microjit/HWMetaDataCreation.cpp b/runtime/compiler/microjit/HWMetaDataCreation.cpp new file mode 100644 index 00000000000..4c746ac7b2e --- /dev/null +++ b/runtime/compiler/microjit/HWMetaDataCreation.cpp @@ -0,0 +1,717 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at http://eclipse.org/legal/epl-2.0 + * or the Apache License, Version 2.0 which accompanies this distribution + * and is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the + * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, + * version 2 with the GNU Classpath Exception [1] and GNU General Public + * License, version 2 with the OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ + +#include +#include "il/Block.hpp" +#include "il/J9Node_inlines.hpp" +#include "infra/Cfg.hpp" +#include "microjit/MethodBytecodeMetaData.hpp" + +#if defined(J9VM_OPT_MJIT_Standalone) +/* do nothing */ +#else + +namespace MJIT +{ + +static void +setupNode( + TR::Node *node, + uint32_t bcIndex, + TR_ResolvedMethod *feMethod, + TR::Compilation *comp) + { + node->getByteCodeInfo().setDoNotProfile(0); + node->setByteCodeIndex(bcIndex); + node->setInlinedSiteIndex(-10); + node->setMethod(feMethod->getPersistentIdentifier()); + } + +static TR::Block * +makeBlock( + TR::Compilation *comp, + TR_ResolvedMethod *feMethod, + uint32_t start, + uint32_t end, + TR::CFG &cfg) + { + TR::TreeTop *startTree = TR::TreeTop::create(comp, TR::Node::create(NULL, TR::BBStart, 0)); + TR::TreeTop *endTree = TR::TreeTop::create(comp, TR::Node::create(NULL, TR::BBEnd, 0)); + startTree->join(endTree); + TR::Block *block = TR::Block::createBlock(startTree, endTree, cfg); + block->setBlockBCIndex(start); + block->setNumber(cfg.getNextNodeNumber()); + setupNode(startTree->getNode(), start, feMethod, comp); + setupNode(endTree->getNode(), end, feMethod, comp); + cfg.addNode(block); + return block; + } + +static void +join( + TR::CFG &cfg, + TR::Block *src, + TR::Block *dst, + bool isBranchDest) + { + TR::CFGEdge *e = cfg.addEdge(src, dst); + + // TODO: MicroJIT: add case for switch + if (isBranchDest) + { + src->getExit()->getNode()->setBranchDestination(dst->getEntry()); + if (dst->getEntry()->getNode()->getByteCodeIndex() < src->getExit()->getNode()->getByteCodeIndex()) + src->setBranchesBackwards(); + } + else + { + src->getExit()->join(dst->getEntry()); + } + src->addSuccessor(e); + dst->addPredecessor(e); + } + +class BytecodeRange + { + + public: + int32_t _start; + int32_t _end; + BytecodeRange(int32_t start) + :_start(start) + ,_end(-1) + {} + inline bool isClosed() + { + return _end > _start; + } + BytecodeRange() = delete; + }; + +class BytecodeRangeList + { + + private: + BytecodeRange *_range; + + public: + TR_ALLOC(TR_Memory::UnknownType); // TODO: MicroJIT: create a memory object type for this and get it added to OMR + BytecodeRangeList *prev; + BytecodeRangeList *next; + BytecodeRangeList(BytecodeRange *range) + :_range(range) + ,next(NULL) + ,prev(NULL) + {} + inline uint32_t getStart() + { + return _range->_start; + } + inline uint32_t getEnd() + { + return _range->_end; + } + inline BytecodeRangeList *insert(BytecodeRangeList *newListEntry) + { + if (this == newListEntry || next == newListEntry) + return this; + while (_range->_start < newListEntry->getStart()) + { + if (next) + { + next->insert(newListEntry); + return this; + } + else + { // end of list + next = newListEntry; + newListEntry->prev = this; + newListEntry->next = NULL; + return this; + } + } + MJIT_ASSERT_NO_MSG(_range->_start != newListEntry->getStart()); + if (prev) + prev->next = newListEntry; + newListEntry->next = this; + newListEntry->prev = prev; + prev = newListEntry; + return newListEntry; + } + }; + +class BytecodeIndexList + { + + private: + BytecodeIndexList *_prev; + BytecodeIndexList *_next; + TR::CFG &_cfg; + + public: + TR_ALLOC(TR_Memory::UnknownType); // TODO: MicroJIT: create a memory object type for this and get it added to OMR + uint32_t _index; + BytecodeIndexList(uint32_t index, BytecodeIndexList *prev, BytecodeIndexList *next, TR::CFG &cfg) + :_index(index) + ,_prev(prev) + ,_next(next) + ,_cfg(cfg) + {} + BytecodeIndexList() = delete; + BytecodeIndexList *appendBCI(uint32_t i) + { + if (i == _index) + return this; + if (_next) + return _next->appendBCI(i); + BytecodeIndexList *newIndex = new (_cfg.getInternalMemoryRegion()) BytecodeIndexList(i, this, _next, _cfg); + _next = newIndex; + return newIndex; + } + BytecodeIndexList *getBCIListEntry(uint32_t i) + { + if (i == _index) + { + return this; + } + else if (i < _index) + { + return _prev->getBCIListEntry(i); + } + else if (_next && _next->_index <= i) + { // (i > _index) + return _next->getBCIListEntry(i); + } + else + { + BytecodeIndexList *newIndex = new (_cfg.getInternalMemoryRegion()) BytecodeIndexList(i, this, _next, _cfg); + return newIndex; + } + } + inline uint32_t getPreviousIndex() + { + return _prev->_index; + } + inline BytecodeIndexList *getPrev() + { + return _prev; + } + BytecodeIndexList *getTail() + { + if (_next) + return _next->getTail(); + return this; + } + inline void cutNext() + { + if (!_next) + return; + _next->_prev = NULL; + _next = NULL; + } + }; + +class BlockList + { + + private: + BlockList *_next; + BlockList *_prev; + BytecodeRangeList *_successorList; + uint32_t _successorCount; + TR::CFG &_cfg; + BlockList** _tail; + BytecodeIndexList *_bcListHead; + BytecodeIndexList *_bcListCurrent; + public: + TR_ALLOC(TR_Memory::UnknownType) // TODO: MicroJIT: create a memory object type for this and get it added to OMR + BytecodeRange _range; + TR::Block *_block; + + BlockList(TR::CFG &cfg, uint32_t start, BlockList** tail) + :_next(NULL) + ,_prev(NULL) + ,_successorList(NULL) + ,_successorCount(0) + ,_cfg(cfg) + ,_tail(tail) + ,_bcListHead(NULL) + ,_bcListCurrent(NULL) + ,_range(start) + ,_block(NULL) + { + _bcListHead = new (_cfg.getInternalMemoryRegion()) BytecodeIndexList(start, NULL, NULL, _cfg); + _bcListCurrent = _bcListHead; + } + + inline void addBCIndex(uint32_t index) + { + _bcListCurrent = _bcListCurrent->appendBCI(index); + } + + inline void setBCListHead(BytecodeIndexList *list) + { + _bcListHead = list; + } + + inline void setBCListCurrent(BytecodeIndexList *list) + { + _bcListCurrent = list; + } + + inline void close() + { + _range._end = _bcListCurrent->_index; + } + + inline void insertAfter(BlockList *bl) + { + MJIT_ASSERT_NO_MSG(bl); + bl->_next = _next; + bl->_prev = this; + if (_next) + _next->_prev = bl; + _next = bl; + if (this == *_tail) + *_tail = bl; + } + + inline void addSuccessor(BytecodeRange *successor) + { + BytecodeRangeList *brl = new (_cfg.getInternalMemoryRegion()) BytecodeRangeList(successor); + if (_successorList) + _successorList = _successorList->insert(brl); + else + _successorList = brl; + _successorCount++; + // Until we support switch and other multiple end point bytecodes this should never go over 2 + MJIT_ASSERT_NO_MSG(_successorCount > 0 && _successorCount <= 2); + } + + BlockList *getBlockListByStartIndex(uint32_t index) + { + // NOTE always start searching from a block with a lower start than the index + MJIT_ASSERT_NO_MSG(_range._start <= index); + + // If this is the target block return it + if (_range._start == index) + return this; + if (_range._start < index && _next && _next->_range._start > index) + return this; + + // If the branch is after this block, and there is more list to search, then continue searching + if (_next) + return _next->getBlockListByStartIndex(index); + + // There was no block in the BlockList for this index + return NULL; + } + + BlockList *getOrMakeBlockListByBytecodeIndex(uint32_t index) + { + // NOTE always start searching from a block with a lower start than the index + MJIT_ASSERT_NO_MSG(_range._start <= index); + + // If this is the target block return it + if (_range._start == index) + return this; + + // If the branch targets the middle of this block (which may be under construction) split it on that index + if (_range.isClosed() && index > _range._start && index <= _range._end) + return splitBlockListOnIndex(index); + if (!_range.isClosed() && _next && index > _range._start && index < _next->_range._start) + return splitBlockListOnIndex(index); + + // If the branch is after this block, and there is more list to search, then continue searching + if (_next && index > _next->_range._start) + return _next->getOrMakeBlockListByBytecodeIndex(index); + + // The target isn't before this block, isn't this block, isn't in this block, and there is no more list to search. + // Time to make a new block and add it to the list + BlockList *target = new (_cfg.getInternalMemoryRegion()) BlockList(_cfg, index, _tail); + insertAfter(target); + return target; + } + + BlockList *splitBlockListOnIndex(uint32_t index) + { + BlockList *target = new (_cfg.getInternalMemoryRegion()) BlockList(_cfg, index, _tail); + insertAfter(target); + + BytecodeIndexList *splitPoint = _bcListHead->getBCIListEntry(index); + target->setBCListHead(splitPoint); + target->setBCListCurrent(splitPoint->getTail()); + + _range._end = splitPoint->getPreviousIndex(); + _bcListCurrent = splitPoint->getPrev(); + _bcListCurrent->cutNext(); + + return target; + } + + BlockList *getNext() + { + return _next; + } + + BlockList *getPrev() + { + return _prev; + } + + inline uint32_t getSuccessorCount() + { + return _successorCount; + } + + inline void getDestinations(uint32_t *destinations) + { + BytecodeRangeList *range = _successorList; + for (uint32_t i=0; i<_successorCount; i++) + { + destinations[i] = range->getStart(); + range = _successorList->next; + } + } + }; + +class CFGCreator + { + + private: + BlockList *_head; + BlockList *_current; + BlockList *_tail; + bool _newBlock; + bool _fallThrough; + uint32_t _blockCount; + TR::CFG &_cfg; + TR_ResolvedMethod *_compilee; + TR::Compilation *_comp; + + public: + CFGCreator(TR::CFG &cfg, TR_ResolvedMethod *compilee, TR::Compilation *comp) + :_head(NULL) + ,_current(NULL) + ,_tail(NULL) + ,_newBlock(false) + ,_fallThrough(true) + ,_blockCount(0) + ,_cfg(cfg) + ,_compilee(compilee) + ,_comp(comp) + {} + + void addBytecodeIndexToCFG(TR_J9ByteCodeIterator *bci) + { + // Just started parsing, make the block list. + if (!_head) + { + _head = new (_cfg.getInternalMemoryRegion()) BlockList(_cfg, bci->currentByteCodeIndex(), &_tail); + _tail = _head; + _current = _head; + } + + // Given current state and current bytecode, update BlockList + BlockList *nextBlock = _head->getBlockListByStartIndex(bci->currentByteCodeIndex()); + + // If we have a block for this (possibly from a forward jump) + // we should use that block, and close the current one. + // If the current block is the right block to work on, then don't close it. + if (nextBlock && nextBlock != _current) + { + _current->close(); + _current = _head->getBlockListByStartIndex(bci->currentByteCodeIndex()); + // If we had thought we needed a new block, override it + _newBlock = false; + _fallThrough = true; + } + + if (_newBlock) + { // Current block has ended in a branch, this is a new block now + nextBlock = new (_cfg.getInternalMemoryRegion()) BlockList(_cfg, bci->currentByteCodeIndex(), &_tail); + _current->insertAfter(nextBlock); + if(_fallThrough) + _current->addSuccessor(&(nextBlock->_range)); + _newBlock = false; + _fallThrough = true; + _current = nextBlock; + } + else if (bci->isBranch()) + { // Get the target block, make it if it isn't there already + _current->_range._end = bci->currentByteCodeIndex(); + nextBlock = _head->getOrMakeBlockListByBytecodeIndex(bci->branchDestination(bci->currentByteCodeIndex())); + if (bci->current() == J9BCgoto || bci->current() == J9BCgotow) + _fallThrough = false; + _current->addSuccessor(&(nextBlock->_range)); + _newBlock = true; + } + else + { // Still working on current block + // TODO: MicroJIT: determine if there are any edge cases that need to be filled in here + } + _current->addBCIndex(bci->currentByteCodeIndex()); + } + + void buildCFG() + { + // Make the blocks + BlockList *temp = _head; + while (temp != NULL) + { + uint32_t start = temp->_range._start; + uint32_t end = temp->_range._end; + temp->_block = makeBlock(_comp, _compilee, start, end, _cfg); + temp = temp->getNext(); + } + + // Use successor lists to build CFG + temp = _head; + while (temp != NULL) + { + uint32_t successorCount = temp->getSuccessorCount(); + uint32_t destinations[successorCount]; + temp->getDestinations(destinations); + for (uint32_t i=0; igetBlockListByStartIndex(destinations[i]); + join(_cfg, temp->_block, dst->_block, (dst == temp->getNext())); + } + temp = temp->getNext(); + } + _cfg.setStart(_head->_block); + } + }; + +static TR::Optimizer *createOptimizer(TR::Compilation *comp, TR::ResolvedMethodSymbol *methodSymbol) + { + return new (comp->trHeapMemory()) TR::Optimizer(comp, methodSymbol, true, J9::Optimizer::microJITOptimizationStrategy(comp)); + } + +MJIT::InternalMetaData +createInternalMethodMetadata( + TR_J9ByteCodeIterator *bci, + MJIT::LocalTableEntry *localTableEntries, + U_16 entries, + int32_t offsetToFirstLocal, + TR_ResolvedMethod *compilee, + TR::Compilation *comp, + MJIT::ParamTable *paramTable, + uint8_t pointerSize, + bool *resolvedAllCallees) + { + int32_t totalSize = 0; + + int32_t localIndex = 0; // Indexes are 0 based and positive + int32_t lastParamSlot = 0; + int32_t gcMapOffset = 0; // Offset will always be greater than 0 + U_16 size = 0; // Sizes are all multiples of 8 and greater than 0 + bool isRef = false; // No good default here, both options are valid. + + uint16_t maxCalleeArgsSize = 0; + + // TODO: MicroJIT: Create the CFG before here and pass reference + bool profilingCompilation = comp->getOption(TR_EnableJProfiling) || comp->getProfilingMode() == JProfiling; + + TR::CFG *cfg; + if (profilingCompilation) + cfg = new (comp->trHeapMemory()) TR::CFG(comp, compilee->findOrCreateJittedMethodSymbol(comp)); + + CFGCreator creator(*cfg, compilee, comp); + ParamTableEntry entry; + for (int i=0; igetParamCount(); i+=entry.slots) + { + MJIT_ASSERT_NO_MSG(paramTable->getEntry(i, &entry)); + size = entry.slots*pointerSize; + localTableEntries[i] = makeLocalTableEntry(i, totalSize + offsetToFirstLocal, size, entry.isReference); + totalSize += size; + lastParamSlot += localTableEntries[i].slots; + } + + for (TR_J9ByteCode bc = bci->first(); bc != J9BCunknown; bc = bci->next()) + { + /* It's okay to overwrite entries here. + * We are not storing whether the entry is used for a + * load or a store because it could be both. Identifying entries + * that we have already created would require zeroing the + * array before use and/or remembering which entries we've created. + * Both might be as much work as just recreating entries, depending + * on how many times a local is stored/loaded during a method. + * This may be worth doing later, but requires some research first. + * + * TODO: MicroJIT: Arguments are not guaranteed to be used in the bytecode of a method + * e.g. class MyClass { public int ret3() {return 3;} } + * The above method is not a static method (arg 0 is an object reference) but the BC + * will be [iconst_3, return]. + * We must find a way to take the parameter table in and create the required entries + * in the local table. + */ + + if (profilingCompilation) + creator.addBytecodeIndexToCFG(bci); + gcMapOffset = 0; + isRef = false; + switch (bc) + { + MakeEntry: + gcMapOffset = offsetToFirstLocal + totalSize; + localTableEntries[localIndex] = makeLocalTableEntry(localIndex, gcMapOffset, size, isRef); + totalSize += size; + break; + case J9BCiload: + case J9BCistore: + case J9BCfstore: + case J9BCfload: + localIndex = (int32_t)bci->nextByte(); + size = pointerSize; + goto MakeEntry; + case J9BCiload0: + case J9BCistore0: + case J9BCfload0: + case J9BCfstore0: + localIndex = 0; + size = pointerSize; + goto MakeEntry; + case J9BCiload1: + case J9BCistore1: + case J9BCfstore1: + case J9BCfload1: + localIndex = 1; + size = pointerSize; + goto MakeEntry; + case J9BCiload2: + case J9BCistore2: + case J9BCfstore2: + case J9BCfload2: + localIndex = 2; + size = pointerSize; + goto MakeEntry; + case J9BCiload3: + case J9BCistore3: + case J9BCfstore3: + case J9BCfload3: + localIndex = 3; + size = pointerSize; + goto MakeEntry; + case J9BClstore: + case J9BClload: + case J9BCdstore: + case J9BCdload: + localIndex = (int32_t)bci->nextByte(); + size = pointerSize*2; + goto MakeEntry; + case J9BClstore0: + case J9BClload0: + case J9BCdload0: + case J9BCdstore0: + localIndex = 0; + size = pointerSize*2; + goto MakeEntry; + case J9BClstore1: + case J9BCdstore1: + case J9BCdload1: + case J9BClload1: + localIndex = 1; + size = pointerSize*2; + goto MakeEntry; + case J9BClstore2: + case J9BClload2: + case J9BCdstore2: + case J9BCdload2: + localIndex = 2; + size = pointerSize*2; + goto MakeEntry; + case J9BClstore3: + case J9BClload3: + case J9BCdstore3: + case J9BCdload3: + localIndex = 3; + size = pointerSize*2; + goto MakeEntry; + case J9BCaload: + case J9BCastore: + localIndex = (int32_t)bci->nextByte(); + size = pointerSize; + isRef = true; + goto MakeEntry; + case J9BCaload0: + case J9BCastore0: + localIndex = 0; + size = pointerSize; + isRef = true; + goto MakeEntry; + case J9BCaload1: + case J9BCastore1: + localIndex = 1; + size = pointerSize; + isRef = true; + goto MakeEntry; + case J9BCaload2: + case J9BCastore2: + localIndex = 2; + size = pointerSize; + isRef = true; + goto MakeEntry; + case J9BCaload3: + case J9BCastore3: + localIndex = 3; + size = pointerSize; + isRef = true; + goto MakeEntry; + case J9BCinvokestatic: + { + int32_t cpIndex = (int32_t)bci->next2Bytes(); + bool isUnresolvedInCP; + TR_ResolvedMethod *resolved = compilee->getResolvedStaticMethod(comp, cpIndex, &isUnresolvedInCP); + if (!resolved) + { + *resolvedAllCallees = false; + break; + } + J9Method *ramMethod = static_cast(resolved)->ramMethod(); + J9ROMMethod *romMethod = J9_ROM_METHOD_FROM_RAM_METHOD(ramMethod); + // TODO: MicroJIT: replace this with a more accurate count; this is a clear upper bound + uint16_t calleeArgsSize = romMethod->argCount * pointerSize * 2; // assume 2 slots for now + maxCalleeArgsSize = (calleeArgsSize > maxCalleeArgsSize) ? calleeArgsSize : maxCalleeArgsSize; + } + default: + break; + } + } + + if(profilingCompilation) + { + creator.buildCFG(); + comp->getMethodSymbol()->setFlowGraph(cfg); + TR::Optimizer *optimizer = createOptimizer(comp, comp->getMethodSymbol()); + comp->setOptimizer(optimizer); + optimizer->optimize(); + } + + MJIT::LocalTable localTable(localTableEntries, entries); + + MJIT::InternalMetaData internalMetaData(localTable, cfg, maxCalleeArgsSize); + return internalMetaData; + } + +} // namespace MJIT +#endif /* TR_MJIT_Interop */ diff --git a/runtime/compiler/microjit/LWMetaDataCreation.cpp b/runtime/compiler/microjit/LWMetaDataCreation.cpp new file mode 100644 index 00000000000..d3c304c4be6 --- /dev/null +++ b/runtime/compiler/microjit/LWMetaDataCreation.cpp @@ -0,0 +1,34 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at http://eclipse.org/legal/epl-2.0 + * or the Apache License, Version 2.0 which accompanies this distribution + * and is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the + * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, + * version 2 with the GNU Classpath Exception [1] and GNU General Public + * License, version 2 with the OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ + +#include "microjit/MethodBytecodeMetaData.hpp" + +/* HWMetaDataCreation is for heavyweight and + LWMetaDataCreation is for lightweight. + Lightweight is meant to be for low-memory environments + where TR would use too many resources (e.g. embedded). + */ + +#if defined(J9VM_OPT_MJIT_Standalone) +// TODO: MicroJIT: Complete this section so that MicroJIT can be used in a future case where MicroJIT won't deploy with TR +#else +/* do nothing */ +#endif /* TR_MJIT_Interop */ diff --git a/runtime/compiler/microjit/MethodBytecodeMetaData.hpp b/runtime/compiler/microjit/MethodBytecodeMetaData.hpp new file mode 100644 index 00000000000..ec28eb8418f --- /dev/null +++ b/runtime/compiler/microjit/MethodBytecodeMetaData.hpp @@ -0,0 +1,69 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at http://eclipse.org/legal/epl-2.0 + * or the Apache License, Version 2.0 which accompanies this distribution + * and is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the + * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, + * version 2 with the GNU Classpath Exception [1] and GNU General Public + * License, version 2 with the OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ + +#ifndef MJIT_METHOD_BYTECODE_META_DATA +#define MJIT_METHOD_BYTECODE_META_DATA + +#include "ilgen/J9ByteCodeIterator.hpp" +#include "microjit/SideTables.hpp" + +namespace MJIT +{ + +#if defined(J9VM_OPT_MJIT_Standalone) +class InternalMetaData + { + public: + MJIT::LocalTable _localTable; + InternalMetaData(MJIT::LocalTable table) + :_localTable(table) + {} + }; +#else +class InternalMetaData + { + public: + MJIT::LocalTable _localTable; + TR::CFG *_cfg; + uint16_t _maxCalleeArgSize; + InternalMetaData(MJIT::LocalTable table, TR::CFG *cfg, uint16_t maxCalleeArgSize) + :_localTable(table) + ,_cfg(cfg) + ,_maxCalleeArgSize(maxCalleeArgSize) + {} + }; +#endif + +MJIT::InternalMetaData +createInternalMethodMetadata( + TR_J9ByteCodeIterator *bci, + MJIT::LocalTableEntry *localTableEntries, + U_16 entries, + int32_t offsetToFirstLocal, + TR_ResolvedMethod *compilee, + TR::Compilation *comp, + ParamTable* paramTable, + uint8_t pointerSize, + bool *resolvedAllCallees); + +} // namespace MJIT + +#endif /* MJIT_METHOD_BYTECODE_META_DATA */ diff --git a/runtime/compiler/microjit/SideTables.hpp b/runtime/compiler/microjit/SideTables.hpp new file mode 100644 index 00000000000..7809a195377 --- /dev/null +++ b/runtime/compiler/microjit/SideTables.hpp @@ -0,0 +1,402 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#ifndef MJIT_SIDETABLE_HPP +#define MJIT_SIDETABLE_HPP + +#include +#include "microjit/x/amd64/AMD64Linkage.hpp" +#include "microjit/utils.hpp" + +/* +| Architecture | Endian | Return address | vTable index | +| x86-64 | Little | Stack | R8 (receiver in RAX) | + +| Integer Return value registers | Integer Preserved registers | Integer Argument registers | +| EAX (32-bit) RAX (64-bit) | RBX R9 | RAX RSI RDX RCX | + +| Float Return value registers | Float Preserved registers | Float Argument registers | +| XMM0 | XMM8-XMM15 | XMM0-XMM7 | +*/ + +namespace MJIT +{ + +struct RegisterStack + { + unsigned char useRAX : 2; + unsigned char useRSI : 2; + unsigned char useRDX : 2; + unsigned char useRCX : 2; + unsigned char useXMM0 : 2; + unsigned char useXMM1 : 2; + unsigned char useXMM2 : 2; + unsigned char useXMM3 : 2; + unsigned char useXMM4 : 2; + unsigned char useXMM5 : 2; + unsigned char useXMM6 : 2; + unsigned char useXMM7 : 2; + unsigned char stackSlotsUsed : 8; + }; + +inline U_16 +calculateOffset(RegisterStack *stack) + { + return 8*(stack->useRAX + + stack->useRSI + + stack->useRDX + + stack->useRCX + + stack->useXMM0 + + stack->useXMM1 + + stack->useXMM2 + + stack->useXMM3 + + stack->useXMM4 + + stack->useXMM5 + + stack->useXMM6 + + stack->useXMM7 + + stack->stackSlotsUsed); + } + +inline void +initParamStack(RegisterStack *stack) + { + stack->useRAX = 0; + stack->useRSI = 0; + stack->useRDX = 0; + stack->useRCX = 0; + stack->useXMM0 = 0; + stack->useXMM1 = 0; + stack->useXMM2 = 0; + stack->useXMM3 = 0; + stack->useXMM4 = 0; + stack->useXMM5 = 0; + stack->useXMM6 = 0; + stack->useXMM7 = 0; + stack->stackSlotsUsed = 0; + } + +inline int +addParamIntToStack(RegisterStack *stack, U_16 size) + { + if (!stack->useRAX) + { + stack->useRAX = size/8; + return TR::RealRegister::eax; + } + else if (!stack->useRSI) + { + stack->useRSI = size/8; + return TR::RealRegister::esi; + } + else if (!stack->useRDX) + { + stack->useRDX = size/8; + return TR::RealRegister::edx; + } + else if (!stack->useRCX) + { + stack->useRCX = size/8; + return TR::RealRegister::ecx; + } + else + { + stack->stackSlotsUsed += size/8; + return TR::RealRegister::NoReg; + } + } + +inline int +addParamFloatToStack(RegisterStack *stack, U_16 size) + { + if (!stack->useXMM0) + { + stack->useXMM0 = size/8; + return TR::RealRegister::xmm0; + } + else if (!stack->useXMM1) + { + stack->useXMM1 = size/8; + return TR::RealRegister::xmm1; + } + else if (!stack->useXMM2) + { + stack->useXMM2 = size/8; + return TR::RealRegister::xmm2; + } + else if (!stack->useXMM3) + { + stack->useXMM3 = size/8; + return TR::RealRegister::xmm3; + } + else if (!stack->useXMM4) + { + stack->useXMM4 = size/8; + return TR::RealRegister::xmm4; + } + else if (!stack->useXMM5) + { + stack->useXMM5 = size/8; + return TR::RealRegister::xmm5; + } + else if (!stack->useXMM6) + { + stack->useXMM6 = size/8; + return TR::RealRegister::xmm6; + } + else if (!stack->useXMM7) + { + stack->useXMM7 = size/8; + return TR::RealRegister::xmm7; + } + else + { + stack->stackSlotsUsed += size/8; + return TR::RealRegister::NoReg; + } + } + +inline int +removeParamIntFromStack(RegisterStack *stack, U_16 *size) + { + if (stack->useRAX) + { + *size = stack->useRAX*8; + stack->useRAX = 0; + return TR::RealRegister::eax; + } + else if (stack->useRSI) + { + *size = stack->useRSI*8; + stack->useRSI = 0; + return TR::RealRegister::esi; + } + else if (stack->useRDX) + { + *size = stack->useRDX*8; + stack->useRDX = 0; + return TR::RealRegister::edx; + } + else if (stack->useRCX) + { + *size = stack->useRCX*8; + stack->useRCX = 0; + return TR::RealRegister::ecx; + } + else + { + *size = 0; + return TR::RealRegister::NoReg; + } + } + +inline int +removeParamFloatFromStack(RegisterStack *stack, U_16 *size) + { + if (stack->useXMM0) + { + *size = stack->useXMM0*8; + stack->useXMM0 = 0; + return TR::RealRegister::xmm0; + } + else if (stack->useXMM1) + { + *size = stack->useXMM1*8; + stack->useXMM1 = 0; + return TR::RealRegister::xmm1; + } + else if (stack->useXMM2) + { + *size = stack->useXMM2*8; + stack->useXMM2 = 0; + return TR::RealRegister::xmm2; + } + else if (stack->useXMM3) + { + *size = stack->useXMM3*8; + stack->useXMM3 = 0; + return TR::RealRegister::xmm3; + } + else if (stack->useXMM4) + { + *size = stack->useXMM4*8; + stack->useXMM4 = 0; + return TR::RealRegister::xmm4; + } + else if (stack->useXMM5) + { + *size = stack->useXMM5*8; + stack->useXMM5 = 0; + return TR::RealRegister::xmm5; + } + else if (stack->useXMM6) + { + *size = stack->useXMM6*8; + stack->useXMM6 = 0; + return TR::RealRegister::xmm6; + } + else if (stack->useXMM7) + { + *size = stack->useXMM7*8; + stack->useXMM7 = 0; + return TR::RealRegister::xmm7; + } + else + { + *size = 0; + return TR::RealRegister::NoReg; + } + } + +struct ParamTableEntry + { + int32_t offset; + int32_t regNo; + int32_t gcMapOffset; + // Stored here so we can look up when saving to local array + char slots; + char size; + bool isReference; + bool onStack; + bool notInitialized; + }; + +inline MJIT::ParamTableEntry +initializeParamTableEntry() + { + MJIT::ParamTableEntry entry; + entry.regNo = TR::RealRegister::NoReg; + entry.offset = 0; + entry.gcMapOffset = 0; + entry.onStack = false; + entry.slots = 0; + entry.size = 0; + entry.isReference = false; + entry.notInitialized = true; + return entry; + } + + +// This function is only used for paramaters +inline ParamTableEntry +makeRegisterEntry(int32_t regNo, int32_t stackOffset, uint16_t size, uint16_t slots, bool isRef) + { + ParamTableEntry entry; + entry.regNo = regNo; + entry.offset = stackOffset; + entry.gcMapOffset = stackOffset; + entry.onStack = false; + entry.slots = slots; + entry.size = size; + entry.isReference = isRef; + entry.notInitialized = false; + return entry; + } + +// This function is only used for paramaters +inline ParamTableEntry +makeStackEntry(int32_t stackOffset, uint16_t size, uint16_t slots, bool isRef) + { + ParamTableEntry entry; + entry.regNo = TR::RealRegister::NoReg; + entry.offset = stackOffset; + entry.gcMapOffset = stackOffset; + entry.onStack = true; + entry.slots = slots; + entry.size = size; + entry.isReference = isRef; + entry.notInitialized = false; + return entry; + } + +class ParamTable + { + + private: + ParamTableEntry *_tableEntries; + U_16 _paramCount; + U_16 _actualParamCount; + RegisterStack *_registerStack; + + public: + ParamTable(ParamTableEntry*, uint16_t, uint16_t, RegisterStack*); + bool getEntry(uint16_t, ParamTableEntry*); + bool setEntry(uint16_t, ParamTableEntry*); + uint16_t getTotalParamSize(); + uint16_t getParamCount(); + uint16_t getActualParamCount(); + }; + +using LocalTableEntry=ParamTableEntry; + +inline LocalTableEntry +makeLocalTableEntry(int32_t gcMapOffset, uint16_t size, uint16_t slots, bool isRef) + { + LocalTableEntry entry; + entry.regNo = TR::RealRegister::NoReg; + entry.offset = -1; // This field isn't used for locals + entry.gcMapOffset = gcMapOffset; + entry.onStack = true; + entry.slots = slots; + entry.size = size; + entry.isReference = isRef; + entry.notInitialized = false; + return entry; + } + +inline LocalTableEntry +initializeLocalTableEntry() + { + ParamTableEntry entry; + entry.regNo = TR::RealRegister::NoReg; + entry.offset = 0; + entry.gcMapOffset = 0; + entry.onStack = false; + entry.slots = 0; + entry.size = 0; + entry.isReference = false; + entry.notInitialized = true; + return entry; + } + +class LocalTable + { + + private: + LocalTableEntry *_tableEntries; + uint16_t _localCount; + + public: + LocalTable(LocalTableEntry*, uint16_t); + bool getEntry(uint16_t, LocalTableEntry*); + uint16_t getTotalLocalSize(); + uint16_t getLocalCount(); + }; + +struct JumpTableEntry + { + uint32_t byteCodeIndex; + char *codeCacheAddress; + JumpTableEntry() {} + JumpTableEntry(uint32_t bci, char* cca) : byteCodeIndex(bci), codeCacheAddress(cca) {} + }; + +} +#endif /* MJIT_SIDETABLE_HPP */ diff --git a/runtime/compiler/microjit/assembly/utils.nasm b/runtime/compiler/microjit/assembly/utils.nasm new file mode 100644 index 00000000000..b19cb9d4dbb --- /dev/null +++ b/runtime/compiler/microjit/assembly/utils.nasm @@ -0,0 +1,30 @@ +; Copyright (c) 2022, 2022 IBM Corp. and others +; +; This program and the accompanying materials are made available under +; the terms of the Eclipse Public License 2.0 which accompanies this +; distribution and is available at https://www.eclipse.org/legal/epl-2.0/ +; or the Apache License, Version 2.0 which accompanies this distribution and +; is available at https://www.apache.org/licenses/LICENSE-2.0. +; +; This Source Code may also be made available under the following +; Secondary Licenses when the conditions for such availability set +; forth in the Eclipse Public License, v. 2.0 are satisfied: GNU +; General Public License, version 2 with the GNU Classpath +; Exception [1] and GNU General Public License, version 2 with the +; OpenJDK Assembly Exception [2]. +; +; [1] https://www.gnu.org/software/classpath/license.html +; [2] http://openjdk.java.net/legal/assembly-exception.html +; +; SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + +%macro template_start 1 +global %1 +global %1 %+ Size +%1: +%endmacro + +%macro template_end 1 +%1 %+ _end: +%1 %+ Size: dw %1 %+ _end - %1 +%endmacro diff --git a/runtime/compiler/microjit/utils.hpp b/runtime/compiler/microjit/utils.hpp new file mode 100644 index 00000000000..e4d9b42a665 --- /dev/null +++ b/runtime/compiler/microjit/utils.hpp @@ -0,0 +1,42 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#ifndef MJIT_UTILS_HPP +#define MJIT_UTILS_HPP + +#include +#include +#include "env/IO.hpp" + +inline void MJIT_ASSERT(TR::FilePointer *filePtr, bool condition, const char * const error_string) + { + if (!condition) + { + trfprintf(filePtr, "%s", error_string); + assert(condition); + } + } + +inline void MJIT_ASSERT_NO_MSG(bool condition) + { + assert(condition); + } +#endif /* MJIT_UTILS_HPP */ diff --git a/runtime/compiler/microjit/x/CMakeLists.txt b/runtime/compiler/microjit/x/CMakeLists.txt new file mode 100644 index 00000000000..c0933947552 --- /dev/null +++ b/runtime/compiler/microjit/x/CMakeLists.txt @@ -0,0 +1,24 @@ +################################################################################ +# Copyright (c) 2022, 2022 IBM Corp. and others +# +# This program and the accompanying materials are made available under +# the terms of the Eclipse Public License 2.0 which accompanies this +# distribution and is available at https://www.eclipse.org/legal/epl-2.0/ +# or the Apache License, Version 2.0 which accompanies this distribution and +# is available at https://www.apache.org/licenses/LICENSE-2.0. +# +# This Source Code may also be made available under the following +# Secondary Licenses when the conditions for such availability set +# forth in the Eclipse Public License, v. 2.0 are satisfied: GNU +# General Public License, version 2 with the GNU Classpath +# Exception [1] and GNU General Public License, version 2 with the +# OpenJDK Assembly Exception [2]. +# +# [1] https://www.gnu.org/software/classpath/license.html +# [2] http://openjdk.java.net/legal/assembly-exception.html +# +# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception +################################################################################ +if(TR_HOST_BITS STREQUAL 64) + add_subdirectory(amd64) +endif() diff --git a/runtime/compiler/microjit/x/amd64/AMD64Codegen.cpp b/runtime/compiler/microjit/x/amd64/AMD64Codegen.cpp new file mode 100755 index 00000000000..b61a730b1f1 --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/AMD64Codegen.cpp @@ -0,0 +1,3171 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#include +#include +#include +#include "j9comp.h" +#include "j9.h" +#include "env/VMJ9.h" +#include "il/Block.hpp" +#include "infra/Cfg.hpp" +#include "compile/Compilation.hpp" +#include "codegen/CodeGenerator.hpp" +#include "control/Recompilation.hpp" +#include "microjit/MethodBytecodeMetaData.hpp" +#include "microjit/x/amd64/AMD64Codegen.hpp" +#include "runtime/CodeCacheManager.hpp" +#include "env/PersistentInfo.hpp" +#include "runtime/J9Runtime.hpp" + +#define MJIT_JITTED_BODY_INFO_PTR_SIZE 8 +#define MJIT_SAVE_AREA_SIZE 2 +#define MJIT_LINKAGE_INFO_SIZE 4 +#define GENERATE_SWITCH_TO_INTERP_PREPROLOGUE 1 + +#define BIT_MASK_32 0xffffffff +#define BIT_MASK_64 0xffffffffffffffff + +// Debug Params, comment/uncomment undef to get omit/enable debugging info +// #define MJIT_DEBUG 1 +#undef MJIT_DEBUG + +#ifdef MJIT_DEBUG +#define MJIT_DEBUG_MAP_PARAMS 1 +#else +#undef MJIT_DEBUG_MAP_PARAMS +#endif +#ifdef MJIT_DEBUG +#define MJIT_DEBUG_BC_WALKING 1 +#define MJIT_DEBUG_BC_LOG(logFile, text) trfprintf(logFile, text) +#else +#undef MJIT_DEBUG_BC_WALKING +#define MJIT_DEBUG_BC_LOG(logFile, text) do {} while(0) +#endif + +#define STACKCHECKBUFFER 512 + +extern J9_CDATA char * const JavaBCNames[]; + +#define COPY_TEMPLATE(buffer, INSTRUCTION, size) \ + do { \ + memcpy(buffer, INSTRUCTION, INSTRUCTION##Size); \ + buffer=(buffer+INSTRUCTION##Size); \ + size += INSTRUCTION##Size; \ + } while (0) + +#define PATCH_RELATIVE_ADDR_8_BIT(buffer, absAddress) \ + do { \ + intptr_t relativeAddress = (intptr_t)(buffer); \ + intptr_t minAddress = (relativeAddress < (intptr_t)(absAddress)) ? relativeAddress : (intptr_t)(absAddress); \ + intptr_t maxAddress = (relativeAddress > (intptr_t)(absAddress)) ? relativeAddress : (intptr_t)(absAddress); \ + intptr_t absDistance = maxAddress - minAddress; \ + MJIT_ASSERT_NO_MSG(absDistance < (intptr_t)0x00000000000000ff); \ + relativeAddress = (relativeAddress < (intptr_t)(absAddress)) ? absDistance : (-1*(intptr_t)(absDistance)); \ + patchImm1(buffer, (U_32)(relativeAddress & 0x00000000000000ff)); \ + } while (0) + +// TODO: MicroJIT: Find out how to jump to trampolines for far calls +#define PATCH_RELATIVE_ADDR_32_BIT(buffer, absAddress) \ + do { \ + intptr_t relativeAddress = (intptr_t)(buffer); \ + intptr_t minAddress = (relativeAddress < (intptr_t)(absAddress)) ? relativeAddress : (intptr_t)(absAddress); \ + intptr_t maxAddress = (relativeAddress > (intptr_t)(absAddress)) ? relativeAddress : (intptr_t)(absAddress); \ + intptr_t absDistance = maxAddress - minAddress; \ + MJIT_ASSERT_NO_MSG(absDistance < (intptr_t)0x00000000ffffffff); \ + relativeAddress = (relativeAddress < (intptr_t)(absAddress)) ? absDistance : (-1*(intptr_t)(absDistance)); \ + patchImm4(buffer, (U_32)(relativeAddress & 0x00000000ffffffff)); \ + } while (0) + +// Using char[] to signify that we are treating this +// like data, even though they are instructions. +#define EXTERN_TEMPLATE(TEMPLATE_NAME) extern char TEMPLATE_NAME[] +#define EXTERN_TEMPLATE_SIZE(TEMPLATE_NAME) extern unsigned short TEMPLATE_NAME##Size +#define DECLARE_TEMPLATE(TEMPLATE_NAME) EXTERN_TEMPLATE(TEMPLATE_NAME); \ + EXTERN_TEMPLATE_SIZE(TEMPLATE_NAME) + +// Labels linked from templates. +DECLARE_TEMPLATE(movRSPR10); +DECLARE_TEMPLATE(movR10R14); +DECLARE_TEMPLATE(subR10Imm4); +DECLARE_TEMPLATE(addR10Imm4); +DECLARE_TEMPLATE(addR14Imm4); +DECLARE_TEMPLATE(subR14Imm4); +DECLARE_TEMPLATE(subRSPImm8); +DECLARE_TEMPLATE(subRSPImm4); +DECLARE_TEMPLATE(subRSPImm2); +DECLARE_TEMPLATE(subRSPImm1); +DECLARE_TEMPLATE(addRSPImm8); +DECLARE_TEMPLATE(addRSPImm4); +DECLARE_TEMPLATE(addRSPImm2); +DECLARE_TEMPLATE(addRSPImm1); +DECLARE_TEMPLATE(jbe4ByteRel); +DECLARE_TEMPLATE(cmpRspRbpDerefOffset); +DECLARE_TEMPLATE(loadEBPOffset); +DECLARE_TEMPLATE(loadESPOffset); +DECLARE_TEMPLATE(loadEAXOffset); +DECLARE_TEMPLATE(loadESIOffset); +DECLARE_TEMPLATE(loadEDXOffset); +DECLARE_TEMPLATE(loadECXOffset); +DECLARE_TEMPLATE(loadEBXOffset); +DECLARE_TEMPLATE(loadEBPOffset); +DECLARE_TEMPLATE(loadESPOffset); +DECLARE_TEMPLATE(loadRAXOffset); +DECLARE_TEMPLATE(loadRSIOffset); +DECLARE_TEMPLATE(loadRDXOffset); +DECLARE_TEMPLATE(loadRCXOffset); +DECLARE_TEMPLATE(loadRBXOffset); +DECLARE_TEMPLATE(loadRBPOffset); +DECLARE_TEMPLATE(loadRSPOffset); +DECLARE_TEMPLATE(loadR9Offset); +DECLARE_TEMPLATE(loadR10Offset); +DECLARE_TEMPLATE(loadR11Offset); +DECLARE_TEMPLATE(loadR12Offset); +DECLARE_TEMPLATE(loadR13Offset); +DECLARE_TEMPLATE(loadR14Offset); +DECLARE_TEMPLATE(loadR15Offset); +DECLARE_TEMPLATE(loadXMM0Offset); +DECLARE_TEMPLATE(loadXMM1Offset); +DECLARE_TEMPLATE(loadXMM2Offset); +DECLARE_TEMPLATE(loadXMM3Offset); +DECLARE_TEMPLATE(loadXMM4Offset); +DECLARE_TEMPLATE(loadXMM5Offset); +DECLARE_TEMPLATE(loadXMM6Offset); +DECLARE_TEMPLATE(loadXMM7Offset); +DECLARE_TEMPLATE(saveEAXOffset); +DECLARE_TEMPLATE(saveESIOffset); +DECLARE_TEMPLATE(saveEDXOffset); +DECLARE_TEMPLATE(saveECXOffset); +DECLARE_TEMPLATE(saveEBXOffset); +DECLARE_TEMPLATE(saveEBPOffset); +DECLARE_TEMPLATE(saveESPOffset); +DECLARE_TEMPLATE(saveRAXOffset); +DECLARE_TEMPLATE(saveRSIOffset); +DECLARE_TEMPLATE(saveRDXOffset); +DECLARE_TEMPLATE(saveRCXOffset); +DECLARE_TEMPLATE(saveRBXOffset); +DECLARE_TEMPLATE(saveRBPOffset); +DECLARE_TEMPLATE(saveRSPOffset); +DECLARE_TEMPLATE(saveR9Offset); +DECLARE_TEMPLATE(saveR10Offset); +DECLARE_TEMPLATE(saveR11Offset); +DECLARE_TEMPLATE(saveR12Offset); +DECLARE_TEMPLATE(saveR13Offset); +DECLARE_TEMPLATE(saveR14Offset); +DECLARE_TEMPLATE(saveR15Offset); +DECLARE_TEMPLATE(saveXMM0Offset); +DECLARE_TEMPLATE(saveXMM1Offset); +DECLARE_TEMPLATE(saveXMM2Offset); +DECLARE_TEMPLATE(saveXMM3Offset); +DECLARE_TEMPLATE(saveXMM4Offset); +DECLARE_TEMPLATE(saveXMM5Offset); +DECLARE_TEMPLATE(saveXMM6Offset); +DECLARE_TEMPLATE(saveXMM7Offset); +DECLARE_TEMPLATE(saveXMM0Local); +DECLARE_TEMPLATE(saveXMM1Local); +DECLARE_TEMPLATE(saveXMM2Local); +DECLARE_TEMPLATE(saveXMM3Local); +DECLARE_TEMPLATE(saveXMM4Local); +DECLARE_TEMPLATE(saveXMM5Local); +DECLARE_TEMPLATE(saveXMM6Local); +DECLARE_TEMPLATE(saveXMM7Local); +DECLARE_TEMPLATE(saveRAXLocal); +DECLARE_TEMPLATE(saveRSILocal); +DECLARE_TEMPLATE(saveRDXLocal); +DECLARE_TEMPLATE(saveRCXLocal); +DECLARE_TEMPLATE(saveR11Local); +DECLARE_TEMPLATE(callByteRel); +DECLARE_TEMPLATE(call4ByteRel); +DECLARE_TEMPLATE(jump4ByteRel); +DECLARE_TEMPLATE(jumpByteRel); +DECLARE_TEMPLATE(nopInstruction); +DECLARE_TEMPLATE(movRDIImm64); +DECLARE_TEMPLATE(movEDIImm32); +DECLARE_TEMPLATE(movRAXImm64); +DECLARE_TEMPLATE(movEAXImm32); +DECLARE_TEMPLATE(movRSPOffsetR11); +DECLARE_TEMPLATE(jumpRDI); +DECLARE_TEMPLATE(jumpRAX); +DECLARE_TEMPLATE(paintRegister); +DECLARE_TEMPLATE(paintLocal); +DECLARE_TEMPLATE(moveCountAndRecompile); +DECLARE_TEMPLATE(checkCountAndRecompile); +DECLARE_TEMPLATE(loadCounter); +DECLARE_TEMPLATE(decrementCounter); +DECLARE_TEMPLATE(incrementCounter); +DECLARE_TEMPLATE(jgCount); +DECLARE_TEMPLATE(callRetranslateArg1); +DECLARE_TEMPLATE(callRetranslateArg2); +DECLARE_TEMPLATE(callRetranslate); +DECLARE_TEMPLATE(setCounter); +DECLARE_TEMPLATE(jmpToBody); + +// bytecodes +DECLARE_TEMPLATE(debugBreakpoint); +DECLARE_TEMPLATE(aloadTemplatePrologue); +DECLARE_TEMPLATE(iloadTemplatePrologue); +DECLARE_TEMPLATE(lloadTemplatePrologue); +DECLARE_TEMPLATE(loadTemplate); +DECLARE_TEMPLATE(astoreTemplate); +DECLARE_TEMPLATE(istoreTemplate); +DECLARE_TEMPLATE(lstoreTemplate); +DECLARE_TEMPLATE(popTemplate); +DECLARE_TEMPLATE(pop2Template); +DECLARE_TEMPLATE(swapTemplate); +DECLARE_TEMPLATE(dupTemplate); +DECLARE_TEMPLATE(dupx1Template); +DECLARE_TEMPLATE(dupx2Template); +DECLARE_TEMPLATE(dup2Template); +DECLARE_TEMPLATE(dup2x1Template); +DECLARE_TEMPLATE(dup2x2Template); +DECLARE_TEMPLATE(getFieldTemplatePrologue); +DECLARE_TEMPLATE(intGetFieldTemplate); +DECLARE_TEMPLATE(longGetFieldTemplate); +DECLARE_TEMPLATE(floatGetFieldTemplate); +DECLARE_TEMPLATE(doubleGetFieldTemplate); +DECLARE_TEMPLATE(intPutFieldTemplatePrologue); +DECLARE_TEMPLATE(addrPutFieldTemplatePrologue); +DECLARE_TEMPLATE(longPutFieldTemplatePrologue); +DECLARE_TEMPLATE(floatPutFieldTemplatePrologue); +DECLARE_TEMPLATE(doublePutFieldTemplatePrologue); +DECLARE_TEMPLATE(intPutFieldTemplate); +DECLARE_TEMPLATE(longPutFieldTemplate); +DECLARE_TEMPLATE(floatPutFieldTemplate); +DECLARE_TEMPLATE(doublePutFieldTemplate); +DECLARE_TEMPLATE(invokeStaticTemplate); +DECLARE_TEMPLATE(staticTemplatePrologue); +DECLARE_TEMPLATE(intGetStaticTemplate); +DECLARE_TEMPLATE(addrGetStaticTemplate); +DECLARE_TEMPLATE(addrGetStaticTemplatePrologue); +DECLARE_TEMPLATE(longGetStaticTemplate); +DECLARE_TEMPLATE(floatGetStaticTemplate); +DECLARE_TEMPLATE(doubleGetStaticTemplate); +DECLARE_TEMPLATE(intPutStaticTemplate); +DECLARE_TEMPLATE(addrPutStaticTemplatePrologue); +DECLARE_TEMPLATE(addrPutStaticTemplate); +DECLARE_TEMPLATE(longPutStaticTemplate); +DECLARE_TEMPLATE(floatPutStaticTemplate); +DECLARE_TEMPLATE(doublePutStaticTemplate); +DECLARE_TEMPLATE(iAddTemplate); +DECLARE_TEMPLATE(iSubTemplate); +DECLARE_TEMPLATE(iMulTemplate); +DECLARE_TEMPLATE(iDivTemplate); +DECLARE_TEMPLATE(iRemTemplate); +DECLARE_TEMPLATE(iNegTemplate); +DECLARE_TEMPLATE(iShlTemplate); +DECLARE_TEMPLATE(iShrTemplate); +DECLARE_TEMPLATE(iUshrTemplate); +DECLARE_TEMPLATE(iAndTemplate); +DECLARE_TEMPLATE(iOrTemplate); +DECLARE_TEMPLATE(iXorTemplate); +DECLARE_TEMPLATE(i2lTemplate); +DECLARE_TEMPLATE(l2iTemplate); +DECLARE_TEMPLATE(i2bTemplate); +DECLARE_TEMPLATE(i2sTemplate); +DECLARE_TEMPLATE(i2cTemplate); +DECLARE_TEMPLATE(i2dTemplate); +DECLARE_TEMPLATE(l2dTemplate); +DECLARE_TEMPLATE(d2iTemplate); +DECLARE_TEMPLATE(d2lTemplate); +DECLARE_TEMPLATE(iconstm1Template); +DECLARE_TEMPLATE(iconst0Template); +DECLARE_TEMPLATE(iconst1Template); +DECLARE_TEMPLATE(iconst2Template); +DECLARE_TEMPLATE(iconst3Template); +DECLARE_TEMPLATE(iconst4Template); +DECLARE_TEMPLATE(iconst5Template); +DECLARE_TEMPLATE(bipushTemplate); +DECLARE_TEMPLATE(sipushTemplatePrologue); +DECLARE_TEMPLATE(sipushTemplate); +DECLARE_TEMPLATE(iIncTemplate_01_load); +DECLARE_TEMPLATE(iIncTemplate_02_add); +DECLARE_TEMPLATE(iIncTemplate_03_store); +DECLARE_TEMPLATE(lAddTemplate); +DECLARE_TEMPLATE(lSubTemplate); +DECLARE_TEMPLATE(lMulTemplate); +DECLARE_TEMPLATE(lDivTemplate); +DECLARE_TEMPLATE(lRemTemplate); +DECLARE_TEMPLATE(lNegTemplate); +DECLARE_TEMPLATE(lShlTemplate); +DECLARE_TEMPLATE(lShrTemplate); +DECLARE_TEMPLATE(lUshrTemplate); +DECLARE_TEMPLATE(lAndTemplate); +DECLARE_TEMPLATE(lOrTemplate); +DECLARE_TEMPLATE(lXorTemplate); +DECLARE_TEMPLATE(lconst0Template); +DECLARE_TEMPLATE(lconst1Template); +DECLARE_TEMPLATE(fAddTemplate); +DECLARE_TEMPLATE(fSubTemplate); +DECLARE_TEMPLATE(fMulTemplate); +DECLARE_TEMPLATE(fDivTemplate); +DECLARE_TEMPLATE(fRemTemplate); +DECLARE_TEMPLATE(fNegTemplate); +DECLARE_TEMPLATE(fconst0Template); +DECLARE_TEMPLATE(fconst1Template); +DECLARE_TEMPLATE(fconst2Template); +DECLARE_TEMPLATE(dAddTemplate); +DECLARE_TEMPLATE(dSubTemplate); +DECLARE_TEMPLATE(dMulTemplate); +DECLARE_TEMPLATE(dDivTemplate); +DECLARE_TEMPLATE(dRemTemplate); +DECLARE_TEMPLATE(dNegTemplate); +DECLARE_TEMPLATE(dconst0Template); +DECLARE_TEMPLATE(dconst1Template); +DECLARE_TEMPLATE(i2fTemplate); +DECLARE_TEMPLATE(f2iTemplate); +DECLARE_TEMPLATE(l2fTemplate); +DECLARE_TEMPLATE(f2lTemplate); +DECLARE_TEMPLATE(d2fTemplate); +DECLARE_TEMPLATE(f2dTemplate); +DECLARE_TEMPLATE(eaxReturnTemplate); +DECLARE_TEMPLATE(raxReturnTemplate); +DECLARE_TEMPLATE(xmm0ReturnTemplate); +DECLARE_TEMPLATE(retTemplate_add); +DECLARE_TEMPLATE(vReturnTemplate); +DECLARE_TEMPLATE(moveeaxForCall); +DECLARE_TEMPLATE(moveesiForCall); +DECLARE_TEMPLATE(moveedxForCall); +DECLARE_TEMPLATE(moveecxForCall); +DECLARE_TEMPLATE(moveraxRefForCall); +DECLARE_TEMPLATE(moversiRefForCall); +DECLARE_TEMPLATE(moverdxRefForCall); +DECLARE_TEMPLATE(movercxRefForCall); +DECLARE_TEMPLATE(moveraxForCall); +DECLARE_TEMPLATE(moversiForCall); +DECLARE_TEMPLATE(moverdxForCall); +DECLARE_TEMPLATE(movercxForCall); +DECLARE_TEMPLATE(movexmm0ForCall); +DECLARE_TEMPLATE(movexmm1ForCall); +DECLARE_TEMPLATE(movexmm2ForCall); +DECLARE_TEMPLATE(movexmm3ForCall); +DECLARE_TEMPLATE(moveDxmm0ForCall); +DECLARE_TEMPLATE(moveDxmm1ForCall); +DECLARE_TEMPLATE(moveDxmm2ForCall); +DECLARE_TEMPLATE(moveDxmm3ForCall); +DECLARE_TEMPLATE(retTemplate_sub); +DECLARE_TEMPLATE(loadeaxReturn); +DECLARE_TEMPLATE(loadraxReturn); +DECLARE_TEMPLATE(loadxmm0Return); +DECLARE_TEMPLATE(loadDxmm0Return); +DECLARE_TEMPLATE(lcmpTemplate); +DECLARE_TEMPLATE(fcmplTemplate); +DECLARE_TEMPLATE(fcmpgTemplate); +DECLARE_TEMPLATE(dcmplTemplate); +DECLARE_TEMPLATE(dcmpgTemplate); +DECLARE_TEMPLATE(gotoTemplate); +DECLARE_TEMPLATE(ifneTemplate); +DECLARE_TEMPLATE(ifeqTemplate); +DECLARE_TEMPLATE(ifltTemplate); +DECLARE_TEMPLATE(ifleTemplate); +DECLARE_TEMPLATE(ifgtTemplate); +DECLARE_TEMPLATE(ifgeTemplate); +DECLARE_TEMPLATE(ificmpeqTemplate); +DECLARE_TEMPLATE(ificmpneTemplate); +DECLARE_TEMPLATE(ificmpltTemplate); +DECLARE_TEMPLATE(ificmpleTemplate); +DECLARE_TEMPLATE(ificmpgeTemplate); +DECLARE_TEMPLATE(ificmpgtTemplate); +DECLARE_TEMPLATE(ifacmpeqTemplate); +DECLARE_TEMPLATE(ifacmpneTemplate); +DECLARE_TEMPLATE(ifnullTemplate); +DECLARE_TEMPLATE(ifnonnullTemplate); +DECLARE_TEMPLATE(decompressReferenceTemplate); +DECLARE_TEMPLATE(compressReferenceTemplate); +DECLARE_TEMPLATE(decompressReference1Template); +DECLARE_TEMPLATE(compressReference1Template); +DECLARE_TEMPLATE(addrGetFieldTemplatePrologue); +DECLARE_TEMPLATE(addrGetFieldTemplate); + + + +static void +debug_print_hex(char *buffer, unsigned long long size) + { + printf("Start of dump:\n"); + while (size--) + printf("%x ", 0xc0 & *(buffer++)); + printf("\nEnd of dump:"); + } + +inline uint32_t +align(uint32_t number, uint32_t requirement, TR::FilePointer *fp) + { + MJIT_ASSERT(fp, requirement && ((requirement & (requirement -1)) == 0), "INCORRECT ALIGNMENT"); + return (number + requirement - 1) & ~(requirement - 1); + } + +static bool +getRequiredAlignment(uintptr_t cursor, uintptr_t boundary, uintptr_t margin, uintptr_t *alignment) + { + if ((boundary & (boundary-1)) != 0) + return true; + *alignment = (-cursor - margin) & (boundary-1); + return false; + } + +static intptr_t +getHelperOrTrampolineAddress(TR_RuntimeHelper h, uintptr_t callsite) + { + uintptr_t helperAddress = (uintptr_t)runtimeHelperValue(h); + uintptr_t minAddress = (callsite < (uintptr_t)(helperAddress)) ? callsite : (uintptr_t)(helperAddress); + uintptr_t maxAddress = (callsite > (uintptr_t)(helperAddress)) ? callsite : (uintptr_t)(helperAddress); + uintptr_t distance = maxAddress - minAddress; + if (distance > 0x00000000ffffffff) + helperAddress = (uintptr_t)TR::CodeCacheManager::instance()->findHelperTrampoline(h, (void *)callsite); + return helperAddress; + } + +bool +MJIT::nativeSignature(J9Method *method, char *resultBuffer) + { + J9UTF8 *methodSig; + UDATA arg; + U_16 i, ch; + BOOLEAN parsingReturnType = FALSE, processingBracket = FALSE; + char nextType = '\0'; + + methodSig = J9ROMMETHOD_SIGNATURE(J9_ROM_METHOD_FROM_RAM_METHOD(method)); + i = 0; + arg = 3; /* skip the return type slot and JNI standard slots, they will be filled in later. */ + + while (i < J9UTF8_LENGTH(methodSig)) + { + ch = J9UTF8_DATA(methodSig)[i++]; + switch (ch) + { + case '(': /* start of signature -- skip */ + continue; + case ')': /* End of signature -- done args, find return type */ + parsingReturnType = TRUE; + continue; + case MJIT::CLASSNAME_TYPE_CHARACTER: + nextType = MJIT::CLASSNAME_TYPE_CHARACTER; + while(J9UTF8_DATA(methodSig)[i++] != ';') {} /* a type string - loop scanning for ';' to end it - i points past ';' when done loop */ + break; + case MJIT::BOOLEAN_TYPE_CHARACTER: + nextType = MJIT::BOOLEAN_TYPE_CHARACTER; + break; + case MJIT::BYTE_TYPE_CHARACTER: + nextType = MJIT::BYTE_TYPE_CHARACTER; + break; + case MJIT::CHAR_TYPE_CHARACTER: + nextType = MJIT::CHAR_TYPE_CHARACTER; + break; + case MJIT::SHORT_TYPE_CHARACTER: + nextType = MJIT::SHORT_TYPE_CHARACTER; + break; + case MJIT::INT_TYPE_CHARACTER: + nextType = MJIT::INT_TYPE_CHARACTER; + break; + case MJIT::LONG_TYPE_CHARACTER: + nextType = MJIT::LONG_TYPE_CHARACTER; + break; + case MJIT::FLOAT_TYPE_CHARACTER: + nextType = MJIT::FLOAT_TYPE_CHARACTER; + break; + case MJIT::DOUBLE_TYPE_CHARACTER: + nextType = MJIT::DOUBLE_TYPE_CHARACTER; + break; + case '[': + processingBracket = TRUE; + continue; /* go back to top of loop for next char */ + case MJIT::VOID_TYPE_CHARACTER: + if (!parsingReturnType) + return true; + nextType = MJIT::VOID_TYPE_CHARACTER; + break; + default: + nextType = '\0'; + return true; + } + if (processingBracket) + { + if (parsingReturnType) + { + resultBuffer[0] = MJIT::CLASSNAME_TYPE_CHARACTER; + break; /* from the while loop */ + } + else + { + resultBuffer[arg] = MJIT::CLASSNAME_TYPE_CHARACTER; + arg++; + processingBracket = FALSE; + } + } + else if (parsingReturnType) + { + resultBuffer[0] = nextType; + break; /* from the while loop */ + } + else + { + resultBuffer[arg] = nextType; + arg++; + } + } + + resultBuffer[1] = MJIT::CLASSNAME_TYPE_CHARACTER; /* the JNIEnv */ + resultBuffer[2] = MJIT::CLASSNAME_TYPE_CHARACTER; /* the jobject or jclass */ + resultBuffer[arg] = '\0'; + return false; + } + +/** + * Assuming that 'buffer' is the end of the immediate value location, + * this function replaces the value supplied value 'imm' + * + * Assumes 64-bit value + */ +static void +patchImm8(char *buffer, U_64 imm) + { + *(((unsigned long long int*)buffer)-1) = imm; + } + +/** + * Assuming that 'buffer' is the end of the immediate value location, + * this function replaces the value supplied value 'imm' + * + * Assumes 32-bit value + */ +static void +patchImm4(char *buffer, U_32 imm) + { + *(((unsigned int*)buffer)-1) = imm; + } + +/** + * Assuming that 'buffer' is the end of the immediate value location, + * this function replaces the value supplied value 'imm' + * + * Assumes 16-bit value + */ +static void +patchImm2(char *buffer, U_16 imm) + { + *(((unsigned short int*)buffer)-1) = imm; + } + +/** + * Assuming that 'buffer' is the end of the immediate value location, + * this function replaces the value supplied value 'imm' + * + * Assumes 8-bit value + */ +static void +patchImm1(char *buffer, U_8 imm) + { + *(unsigned char*)(buffer-1) = imm; + } + +/* +| Architecture | Endian | Return address | vTable index | +| x86-64 | Little | Stack | R8 (receiver in RAX) | + +| Integer Return value registers | Integer Preserved registers | Integer Argument registers | +| EAX (32-bit) RAX (64-bit) | RBX R9 RSP | RAX RSI RDX RCX | + +| Long Return value registers | Long Preserved registers | Long Argument registers | +| RAX (64-bit) | RBX R9 RSP | RAX RSI RDX RCX | + +| Float Return value registers | Float Preserved registers | Float Argument registers | +| XMM0 | XMM8-XMM15 | XMM0-XMM7 | + +| Double Return value registers | Double Preserved registers | Double Argument registers | +| XMM0 | XMM8-XMM15 | XMM0-XMM7 | +*/ + +inline buffer_size_t +savePreserveRegisters(char *buffer, int32_t offset) + { + buffer_size_t size = 0; + COPY_TEMPLATE(buffer, saveRBXOffset, size); + patchImm4(buffer, offset); + COPY_TEMPLATE(buffer, saveR9Offset, size); + patchImm4(buffer, offset-8); + COPY_TEMPLATE(buffer, saveRSPOffset, size); + patchImm4(buffer, offset-16); + return size; + } + +inline buffer_size_t +loadPreserveRegisters(char *buffer, int32_t offset) + { + buffer_size_t size = 0; + COPY_TEMPLATE(buffer, loadRBXOffset, size); + patchImm4(buffer, offset); + COPY_TEMPLATE(buffer, loadR9Offset, size); + patchImm4(buffer, offset-8); + COPY_TEMPLATE(buffer, loadRSPOffset, size); + patchImm4(buffer, offset-16); + return size; + } + +#define COPY_IMM1_TEMPLATE(buffer, size, value) \ + do { \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + patchImm1(buffer, (U_8)(value)); \ + } while (0) + +#define COPY_IMM2_TEMPLATE(buffer, size, value) \ + do { \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + patchImm2(buffer, (U_16)(value)); \ + } while (0) + +#define COPY_IMM4_TEMPLATE(buffer, size, value) \ + do { \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + patchImm4(buffer, (U_32)(value)); \ + } while (0) + +#define COPY_IMM8_TEMPLATE(buffer, size, value) \ + do { \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + COPY_TEMPLATE(buffer, nopInstruction, size); \ + patchImm8(buffer, (U_64)(value)); \ + } while (0) + +MJIT::RegisterStack +MJIT::mapIncomingParams( + char *typeString, + U_16 maxLength, + int *error_code, + ParamTableEntry *paramTable, + U_16 actualParamCount, + TR::FilePointer *logFileFP) + { + MJIT::RegisterStack stack; + initParamStack(&stack); + + uint16_t index = 0; + intptr_t offset = 0; + + for (int i = 0; i= paramIndex) ? false : (paramIndex < _paramCount)); + if (success) + *entry = _tableEntries[paramIndex]; + return success; + } + +bool +MJIT::ParamTable::setEntry(U_16 paramIndex, MJIT::ParamTableEntry *entry) + { + bool success = ((-1 >= paramIndex) ? false : (paramIndex < _paramCount)); + if (success) + _tableEntries[paramIndex] = *entry; + return success; + } + +U_16 +MJIT::ParamTable::getTotalParamSize() + { + return calculateOffset(_registerStack); + } + +U_16 +MJIT::ParamTable::getParamCount() + { + return _paramCount; + } + +U_16 +MJIT::ParamTable::getActualParamCount() + { + return _actualParamCount; + } + +MJIT::LocalTable::LocalTable(LocalTableEntry *tableEntries, U_16 localCount) + : _tableEntries(tableEntries) + , _localCount(localCount) + { + + } + +bool +MJIT::LocalTable::getEntry(U_16 localIndex, MJIT::LocalTableEntry *entry) + { + bool success = ((-1 >= localIndex) ? false : (localIndex < _localCount)); + if (success) + *entry = _tableEntries[localIndex]; + return success; + } + +U_16 +MJIT::LocalTable::getTotalLocalSize() + { + U_16 localSize = 0; + for (int i=0; i<_localCount; i++) + localSize += _tableEntries[i].slots*8; + return localSize; + } + +U_16 +MJIT::LocalTable::getLocalCount() + { + return _localCount; + } + +buffer_size_t +MJIT::CodeGenerator::generateGCR( + char *buffer, + int32_t initialCount, + J9Method *method, + uintptr_t startPC) + { + // see TR_J9ByteCodeIlGenerator::prependGuardedCountForRecompilation(TR::Block * originalFirstBlock) + // guardBlock: if (!countForRecompile) goto originalFirstBlock; + // bumpCounterBlock: count--; + // if (bodyInfo->count > 0) goto originalFirstBlock; + // callRecompileBlock: call jitRetranslateCallerWithPreparation(j9method, startPC); + // bodyInfo->count=10000 + // goto originalFirstBlock + // bumpCounter: count + // originalFirstBlock: ... + // + buffer_size_t size = 0; + + // MicroJIT either needs its own guard, or it needs to not have a guard block + // The following is kept here so that if a new guard it + /* + COPY_TEMPLATE(buffer, moveCountAndRecompile, size); + char *guardBlock = buffer; + COPY_TEMPLATE(buffer, checkCountAndRecompile, size); + char *guardBlockJump = buffer; + */ + + // bumpCounterBlock + COPY_TEMPLATE(buffer, loadCounter, size); + char *counterLoader = buffer; + COPY_TEMPLATE(buffer, decrementCounter, size); + char *counterStore = buffer; + COPY_TEMPLATE(buffer, jgCount, size); + char *bumpCounterBlockJump = buffer; + + // callRecompileBlock + COPY_TEMPLATE(buffer, callRetranslateArg1, size); + char *retranslateArg1Patch = buffer; + COPY_TEMPLATE(buffer, callRetranslateArg2, size); + char *retranslateArg2Patch = buffer; + COPY_TEMPLATE(buffer, callRetranslate, size); + char *callSite = buffer; + COPY_TEMPLATE(buffer, setCounter, size); + char *counterSetter = buffer; + COPY_TEMPLATE(buffer, jmpToBody, size); + char *callRecompileBlockJump = buffer; + + // bumpCounter + char *counterLocation = buffer; + COPY_IMM4_TEMPLATE(buffer, size, initialCount); + + uintptr_t jitRetranslateCallerWithPrepHelperAddress = getHelperOrTrampolineAddress(TR_jitRetranslateCallerWithPrep, (uintptr_t)buffer); + + // Find the countForRecompile's address and patch for the guard block. + patchImm8(retranslateArg1Patch, (uintptr_t)(method)); + patchImm8(retranslateArg2Patch, (uintptr_t)(startPC)); + // patchImm8(guardBlock, (uintptr_t)(&(_persistentInfo->_countForRecompile))); + PATCH_RELATIVE_ADDR_32_BIT(callSite, jitRetranslateCallerWithPrepHelperAddress); + patchImm8(counterLoader, (uintptr_t)counterLocation); + patchImm8(counterStore, (uintptr_t)counterLocation); + patchImm8(counterSetter, (uintptr_t)counterLocation); + // PATCH_RELATIVE_ADDR_32_BIT(guardBlockJump, buffer); + PATCH_RELATIVE_ADDR_32_BIT(bumpCounterBlockJump, buffer); + PATCH_RELATIVE_ADDR_32_BIT(callRecompileBlockJump, buffer); + + return size; + } + +buffer_size_t +MJIT::CodeGenerator::saveArgsInLocalArray( + char *buffer, + buffer_size_t stack_alloc_space, + char *typeString) + { + buffer_size_t saveSize = 0; + U_16 slot = 0; + TR::RealRegister::RegNum regNum = TR::RealRegister::NoReg; + char typeChar; + int index = 0; + ParamTableEntry entry; + while (index<_paramTable->getParamCount()) + { + MJIT_ASSERT(_logFileFP, _paramTable->getEntry(index, &entry), "Bad index for table entry"); + if (entry.notInitialized) + { + index++; + continue; + } + if (entry.onStack) + break; + int32_t regNum = entry.regNo; + switch (regNum) + { + case TR::RealRegister::xmm0: + COPY_TEMPLATE(buffer, saveXMM0Local, saveSize); + goto PatchAndBreak; + case TR::RealRegister::xmm1: + COPY_TEMPLATE(buffer, saveXMM1Local, saveSize); + goto PatchAndBreak; + case TR::RealRegister::xmm2: + COPY_TEMPLATE(buffer, saveXMM2Local, saveSize); + goto PatchAndBreak; + case TR::RealRegister::xmm3: + COPY_TEMPLATE(buffer, saveXMM3Local, saveSize); + goto PatchAndBreak; + case TR::RealRegister::xmm4: + COPY_TEMPLATE(buffer, saveXMM4Local, saveSize); + goto PatchAndBreak; + case TR::RealRegister::xmm5: + COPY_TEMPLATE(buffer, saveXMM5Local, saveSize); + goto PatchAndBreak; + case TR::RealRegister::xmm6: + COPY_TEMPLATE(buffer, saveXMM6Local, saveSize); + goto PatchAndBreak; + case TR::RealRegister::xmm7: + COPY_TEMPLATE(buffer, saveXMM7Local, saveSize); + goto PatchAndBreak; + case TR::RealRegister::eax: + COPY_TEMPLATE(buffer, saveRAXLocal, saveSize); + goto PatchAndBreak; + case TR::RealRegister::esi: + COPY_TEMPLATE(buffer, saveRSILocal, saveSize); + goto PatchAndBreak; + case TR::RealRegister::edx: + COPY_TEMPLATE(buffer, saveRDXLocal, saveSize); + goto PatchAndBreak; + case TR::RealRegister::ecx: + COPY_TEMPLATE(buffer, saveRCXLocal, saveSize); + goto PatchAndBreak; + PatchAndBreak: + patchImm4(buffer, (U_32)((slot*8) & BIT_MASK_32)); + break; + case TR::RealRegister::NoReg: + break; + } + slot += entry.slots; + index += entry.slots; + } + + while (index<_paramTable->getParamCount()) + { + MJIT_ASSERT(_logFileFP, _paramTable->getEntry(index, &entry), "Bad index for table entry"); + if (entry.notInitialized) + { + index++; + continue; + } + uintptr_t offset = entry.offset; + // Our linkage templates for loading from the stack assume that the offset will always be less than 0xff. + // This is however not always true for valid code. If this becomes a problem in the future we will + // have to add support for larger offsets by creating new templates that use 2 byte offsets from rsp. + // Whether that will be a change to the current templates, or new templates with supporting code + // is a decision yet to be determined. + if ((offset + stack_alloc_space) < uintptr_t(0xff)) + { + if (_comp->getOption(TR_TraceCG)) + trfprintf(_logFileFP, "Offset too large, add support for larger offsets"); + return -1; + } + COPY_TEMPLATE(buffer, movRSPOffsetR11, saveSize); + patchImm1(buffer, (U_8)(offset+stack_alloc_space)); + COPY_TEMPLATE(buffer, saveR11Local, saveSize); + patchImm4(buffer, (U_32)((slot*8) & BIT_MASK_32)); + slot += entry.slots; + index += entry.slots; + } + return saveSize; + } + +buffer_size_t +MJIT::CodeGenerator::saveArgsOnStack( + char *buffer, + buffer_size_t stack_alloc_space, + ParamTable *paramTable) + { + buffer_size_t saveArgsSize = 0; + U_16 offset = paramTable->getTotalParamSize(); + TR::RealRegister::RegNum regNum = TR::RealRegister::NoReg; + ParamTableEntry entry; + for (int i=0; igetParamCount();) + { + MJIT_ASSERT(_logFileFP, paramTable->getEntry(i, &entry), "Bad index for table entry"); + if (entry.notInitialized) + { + i++; + continue; + } + if (entry.onStack) + break; + int32_t regNum = entry.regNo; + if (i != 0) + offset -= entry.size; + switch (regNum) + { + case TR::RealRegister::xmm0: + COPY_TEMPLATE(buffer, saveXMM0Offset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm1: + COPY_TEMPLATE(buffer, saveXMM1Offset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm2: + COPY_TEMPLATE(buffer, saveXMM2Offset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm3: + COPY_TEMPLATE(buffer, saveXMM3Offset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm4: + COPY_TEMPLATE(buffer, saveXMM4Offset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm5: + COPY_TEMPLATE(buffer, saveXMM5Offset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm6: + COPY_TEMPLATE(buffer, saveXMM6Offset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm7: + COPY_TEMPLATE(buffer, saveXMM7Offset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::eax: + COPY_TEMPLATE(buffer, saveRAXOffset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::esi: + COPY_TEMPLATE(buffer, saveRSIOffset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::edx: + COPY_TEMPLATE(buffer, saveRDXOffset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::ecx: + COPY_TEMPLATE(buffer, saveRCXOffset, saveArgsSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::NoReg: + break; + } + i += entry.slots; + } + return saveArgsSize; + } + +buffer_size_t +MJIT::CodeGenerator::loadArgsFromStack( + char *buffer, + buffer_size_t stack_alloc_space, + ParamTable *paramTable) + { + U_16 offset = paramTable->getTotalParamSize(); + buffer_size_t argLoadSize = 0; + TR::RealRegister::RegNum regNum = TR::RealRegister::NoReg; + ParamTableEntry entry; + for (int i=0; igetParamCount();) + { + MJIT_ASSERT(_logFileFP, paramTable->getEntry(i, &entry), "Bad index for table entry"); + if (entry.notInitialized) + { + i++; + continue; + } + if (entry.onStack) + break; + int32_t regNum = entry.regNo; + if (i != 0) + offset -= entry.size; + switch(regNum) + { + case TR::RealRegister::xmm0: + COPY_TEMPLATE(buffer, loadXMM0Offset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm1: + COPY_TEMPLATE(buffer, loadXMM1Offset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm2: + COPY_TEMPLATE(buffer, loadXMM2Offset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm3: + COPY_TEMPLATE(buffer, loadXMM3Offset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm4: + COPY_TEMPLATE(buffer, loadXMM4Offset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm5: + COPY_TEMPLATE(buffer, loadXMM5Offset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm6: + COPY_TEMPLATE(buffer, loadXMM6Offset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::xmm7: + COPY_TEMPLATE(buffer, loadXMM7Offset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::eax: + COPY_TEMPLATE(buffer, loadRAXOffset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::esi: + COPY_TEMPLATE(buffer, loadRSIOffset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::edx: + COPY_TEMPLATE(buffer, loadRDXOffset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::ecx: + COPY_TEMPLATE(buffer, loadRCXOffset, argLoadSize); + patchImm4(buffer, (U_32)((offset+ stack_alloc_space) & 0xffffffff)); + break; + case TR::RealRegister::NoReg: + break; + } + i += entry.slots; + } + return argLoadSize; + } + +MJIT::CodeGenerator::CodeGenerator( + struct J9JITConfig *config, + J9VMThread *thread, + TR::FilePointer *fp, + TR_J9VMBase& vm, + ParamTable *paramTable, + TR::Compilation *comp, + MJIT::CodeGenGC *mjitCGGC, + TR::PersistentInfo *persistentInfo, + TR_Memory *trMemory, + TR_ResolvedMethod *compilee) + :_linkage(Linkage(comp->cg())) + ,_logFileFP(fp) + ,_vm(vm) + ,_codeCache(NULL) + ,_stackPeakSize(0) + ,_paramTable(paramTable) + ,_comp(comp) + ,_mjitCGGC(mjitCGGC) + ,_atlas(NULL) + ,_persistentInfo(persistentInfo) + ,_trMemory(trMemory) + ,_compilee(compilee) + ,_maxCalleeArgsSize(0) + { + _linkage._properties.setOutgoingArgAlignment(lcm(16, _vm.getLocalObjectAlignmentInBytes())); + } + +TR::GCStackAtlas * +MJIT::CodeGenerator::getStackAtlas() + { + return _atlas; + } + +buffer_size_t +MJIT::CodeGenerator::generateSwitchToInterpPrePrologue( + char *buffer, + J9Method *method, + buffer_size_t boundary, + buffer_size_t margin, + char *typeString, + U_16 maxLength) + { + uintptr_t startOfMethod = (uintptr_t)buffer; + buffer_size_t switchSize = 0; + + // Store the J9Method address in edi and store the label of this jitted method. + uintptr_t j2iTransitionPtr = getHelperOrTrampolineAddress(TR_j2iTransition, (uintptr_t)buffer); + uintptr_t methodPtr = (uintptr_t)method; + COPY_TEMPLATE(buffer, movRDIImm64, switchSize); + patchImm8(buffer, methodPtr); + + // if (comp->getOption(TR_EnableHCR)) + // comp->getStaticHCRPICSites()->push_front(prev); + + // Pass args on stack, expects the method to run in RDI + buffer_size_t saveArgSize = saveArgsOnStack(buffer, 0, _paramTable); + buffer += saveArgSize; + switchSize += saveArgSize; + + // Generate jump to the TR_i2jTransition function + COPY_TEMPLATE(buffer, jump4ByteRel, switchSize); + PATCH_RELATIVE_ADDR_32_BIT(buffer, j2iTransitionPtr); + + margin += 2; + uintptr_t requiredAlignment = 0; + if (getRequiredAlignment((uintptr_t)buffer, boundary, margin, &requiredAlignment)) + { + buffer = (char*)startOfMethod; + return 0; + } + for (U_8 i = 0; i<=requiredAlignment; i++) + COPY_TEMPLATE(buffer, nopInstruction, switchSize); + + // Generate relative jump to start + COPY_TEMPLATE(buffer, jumpByteRel, switchSize); + PATCH_RELATIVE_ADDR_8_BIT(buffer, startOfMethod); + + return switchSize; + } + +buffer_size_t +MJIT::CodeGenerator::generatePrePrologue( + char *buffer, + J9Method *method, + char** magicWordLocation, + char** first2BytesPatchLocation, + char** samplingRecompileCallLocation) + { + // Return size (in bytes) of pre-prologue on success + + buffer_size_t preprologueSize = 0; + int alignmentMargin = 0; + int alignmentBoundary = 8; + uintptr_t endOfPreviousMethod = (uintptr_t)buffer; + + J9ROMClass *romClass = J9_CLASS_FROM_METHOD(method)->romClass; + J9ROMMethod *romMethod = J9_ROM_METHOD_FROM_RAM_METHOD(method); + U_16 maxLength = J9UTF8_LENGTH(J9ROMMETHOD_SIGNATURE(romMethod)); + char typeString[maxLength]; + if (nativeSignature(method, typeString) // If this had no signature, we can't compile it. + || !_comp->getRecompilationInfo()) // If this has no recompilation info we won't be able to recompile, so let TR compile it later. + { + return 0; + } + + // Save area for the first two bytes of the method + jitted body info pointer + linkageinfo in margin. + alignmentMargin += MJIT_SAVE_AREA_SIZE + MJIT_JITTED_BODY_INFO_PTR_SIZE + MJIT_LINKAGE_INFO_SIZE; + + // Make sure the startPC at least 4-byte aligned. This is important, since the VM + // depends on the alignment (it uses the low order bits as tag bits). + // + // TODO: MicroJIT: `mustGenerateSwitchToInterpreterPrePrologue` checks that the code generator's compilation is not: + // `usesPreexistence`, + // `getOption(TR_EnableHCR)`, + // `!fej9()->isAsyncCompilation`, + // `getOption(TR_FullSpeedDebug)`, + // + if (GENERATE_SWITCH_TO_INTERP_PREPROLOGUE) + { + // TODO: MicroJIT: Replace with a check that performs the above checks + // generateSwitchToInterpPrePrologue will align data for use + char *old_buff = buffer; + preprologueSize += generateSwitchToInterpPrePrologue(buffer, method, alignmentBoundary, alignmentMargin, typeString, maxLength); + buffer += preprologueSize; +#ifdef MJIT_DEBUG + trfprintf(_logFileFP, "\ngenerateSwitchToInterpPrePrologue: %d\n", preprologueSize); + for (int32_t i = 0; i < preprologueSize; i++) + trfprintf(_logFileFP, "%02x\n", ((unsigned char)old_buff[i]) & (unsigned char)0xff); +#endif + } + else + { + uintptr_t requiredAlignment = 0; + if (getRequiredAlignment((uintptr_t)buffer, alignmentBoundary, alignmentMargin, &requiredAlignment)) + { + buffer = (char*)endOfPreviousMethod; + return 0; + } + for (U_8 i = 0; igetRecompilationInfo()->getJittedBodyInfo(); + bodyInfo->setUsesGCR(); + bodyInfo->setIsMJITCompiledMethod(true); + COPY_IMM8_TEMPLATE(buffer, preprologueSize, bodyInfo); + + // Allow 4 bytes for private linkage return type info. Allocate the 4 bytes + // even if the linkage is not private, so that all the offsets are + // predictable. + uint32_t magicWord = 0x00000010; // All MicroJIT compilations are set to recompile with GCR. + switch (typeString[0]) + { + case MJIT::VOID_TYPE_CHARACTER: + COPY_IMM4_TEMPLATE(buffer, preprologueSize, magicWord | TR_VoidReturn); + break; + case MJIT::BYTE_TYPE_CHARACTER: + case MJIT::SHORT_TYPE_CHARACTER: + case MJIT::CHAR_TYPE_CHARACTER: + case MJIT::BOOLEAN_TYPE_CHARACTER: + case MJIT::INT_TYPE_CHARACTER: + COPY_IMM4_TEMPLATE(buffer, preprologueSize, magicWord | TR_IntReturn); + break; + case MJIT::LONG_TYPE_CHARACTER: + COPY_IMM4_TEMPLATE(buffer, preprologueSize, magicWord | TR_LongReturn); + break; + case MJIT::CLASSNAME_TYPE_CHARACTER: + COPY_IMM4_TEMPLATE(buffer, preprologueSize, magicWord | TR_ObjectReturn); + break; + case MJIT::FLOAT_TYPE_CHARACTER: + COPY_IMM4_TEMPLATE(buffer, preprologueSize, magicWord | TR_FloatXMMReturn); + break; + case MJIT::DOUBLE_TYPE_CHARACTER: + COPY_IMM4_TEMPLATE(buffer, preprologueSize, magicWord | TR_DoubleXMMReturn); + break; + } + *magicWordLocation = buffer; + return preprologueSize; + } + +/* + * Write the prologue to the buffer and return the size written to the buffer. + */ +buffer_size_t +MJIT::CodeGenerator::generatePrologue( + char *buffer, + J9Method *method, + char **jitStackOverflowJumpPatchLocation, + char *magicWordLocation, + char *first2BytesPatchLocation, + char *samplingRecompileCallLocation, + char **firstInstLocation, + TR_J9ByteCodeIterator *bci) + { + buffer_size_t prologueSize = 0; + uintptr_t prologueStart = (uintptr_t)buffer; + + J9ROMClass *romClass = J9_CLASS_FROM_METHOD(method)->romClass; + J9ROMMethod *romMethod = J9_ROM_METHOD_FROM_RAM_METHOD(method); + U_16 maxLength = J9UTF8_LENGTH(J9ROMMETHOD_SIGNATURE(romMethod)); + char typeString[maxLength]; + if (nativeSignature(method, typeString)) + return 0; + uintptr_t startPC = (uintptr_t)buffer; + // Same order as saving (See generateSwitchToInterpPrePrologue) + buffer_size_t loadArgSize = loadArgsFromStack(buffer, 0, _paramTable); + buffer += loadArgSize; + prologueSize += loadArgSize; + + uint32_t magicWord = *(magicWordLocation-4); // We save the end of the location for patching, not the start. + magicWord |= (((uintptr_t)buffer) - prologueStart) << 16; + patchImm4(magicWordLocation, magicWord); + + // check for stack overflow + *firstInstLocation = buffer; + COPY_TEMPLATE(buffer, cmpRspRbpDerefOffset, prologueSize); + patchImm4(buffer, (U_32)(0x50)); + patchImm2(first2BytesPatchLocation, *(uint16_t*)(*firstInstLocation)); + COPY_TEMPLATE(buffer, jbe4ByteRel, prologueSize); + *jitStackOverflowJumpPatchLocation = buffer; + + // Compute frame size + // + // allocSize: bytes to be subtracted from the stack pointer when allocating the frame + // peakSize: maximum bytes of stack this method might consume before encountering another stack check + // + struct TR::X86LinkageProperties properties = _linkage._properties; + // Max amount of data to be preserved. It is overkill, but as long as the prologue/epilogue save/load everything + // it is a workable solution for now. Later try to determine what registers need to be preserved ahead of time. + const int32_t pointerSize = properties.getPointerSize(); + U_32 preservedRegsSize = properties._numPreservedRegisters * pointerSize; + const int32_t localSize = (romMethod->argCount + romMethod->tempCount)*pointerSize; + MJIT_ASSERT(_logFileFP, localSize >= 0, "assertion failure"); + int32_t frameSize = localSize + preservedRegsSize + ( properties.getReservesOutgoingArgsInPrologue() ? pointerSize : 0 ); + uint32_t stackSize = frameSize + properties.getRetAddressWidth(); + uint32_t adjust = align(stackSize, properties.getOutgoingArgAlignment(), _logFileFP) - stackSize; + auto allocSize = frameSize + adjust; + + const int32_t mjitRegisterSize = (6 * pointerSize); + + // return address is allocated by call instruction + // Here we conservatively assume there is a call + _stackPeakSize = + allocSize + // space for stack frame + mjitRegisterSize + // saved registers, conservatively expecting a call + _stackPeakSize; // space for mjit value stack (set in CompilationInfoPerThreadBase::mjit) + + // Small: entire stack usage fits in STACKCHECKBUFFER, so if sp is within + // the soft limit before buying the frame, then the whole frame will fit + // within the hard limit. + // + // Medium: the additional stack required after bumping the sp fits in + // STACKCHECKBUFFER, so if sp after the bump is within the soft limit, the + // whole frame will fit within the hard limit. + // + // Large: No shortcuts. Calculate the maximum extent of stack needed and + // compare that against the soft limit. (We have to use the soft limit here + // if for no other reason than that's the one used for asyncchecks.) + + COPY_TEMPLATE(buffer, subRSPImm4, prologueSize); + patchImm4(buffer, _stackPeakSize); + + buffer_size_t savePreserveSize = savePreserveRegisters(buffer, _stackPeakSize-8); + buffer += savePreserveSize; + prologueSize += savePreserveSize; + + buffer_size_t saveArgSize = saveArgsOnStack(buffer, _stackPeakSize, _paramTable); + buffer += saveArgSize; + prologueSize += saveArgSize; + + /* + * MJIT stack frames must conform to TR stack frame shapes. However, so long as we are able to have our frames walked, we should be + * free to allocate more things on the stack before the required data which is used for walking. The following is the general shape of the + * initial MJIT stack frame: + * +-----------------------------+ + * | ... | + * | Paramaters passed by caller | <-- The space for saving params passed in registers is also here + * | ... | + * +-----------------------------+ + * | Return Address | <-- RSP points here before we sub the _stackPeakSize + * +-----------------------------+ + * | Alignment space | + * +-----------------------------+ <-- Caller/Callee stack boundary + * | ... | + * | Callee Preserved Registers | + * | ... | + * +-----------------------------+ + * | ... | + * | Local Variables | + * | ... | <-- R10 points here at the end of the prologue, R14 always points here + * +-----------------------------+ + * | ... | + * | MJIT Computation Stack | + * | ... | + * +-----------------------------+ + * | ... | + * | Caller Preserved Registers | + * | ... | <-- RSP points here after we sub _stackPeakSize the first time + * +-----------------------------+ + * | ... | + * | Paramaters passed to callee | + * | ... | <-- RSP points here after we sub _stackPeakSize the second time + * +-----------------------------+ + * | Return Address | <-- RSP points here after a callq instruction + * +-----------------------------+ <-- End of stack frame + */ + + // Set up MJIT value stack + COPY_TEMPLATE(buffer, movRSPR10, prologueSize); + COPY_TEMPLATE(buffer, addR10Imm4, prologueSize); + patchImm4(buffer, (U_32)(mjitRegisterSize)); + COPY_TEMPLATE(buffer, addR10Imm4, prologueSize); + patchImm4(buffer, (U_32)(romMethod->maxStack*8)); + // TODO: MicroJIT: Find a way to cleanly preserve this value before overriding it in the _stackAllocSize + + // Set up MJIT local array + COPY_TEMPLATE(buffer, movR10R14, prologueSize); + + // Move parameters to where the method body will expect to find them + buffer_size_t saveSize = saveArgsInLocalArray(buffer, _stackPeakSize, typeString); + if ((int32_t)saveSize == -1) + return 0; + buffer += saveSize; + prologueSize += saveSize; + if (!saveSize && romMethod->argCount) + return 0; + + int32_t firstLocalOffset = (preservedRegsSize+8); + // This is the number of slots, not variables. + U_16 localVariableCount = romMethod->argCount + romMethod->tempCount; + MJIT::LocalTableEntry localTableEntries[localVariableCount]; + for (int i=0; icg()->setStackFramePaddingSizeInBytes(adjust); + _comp->cg()->setFrameSizeInBytes(_stackPeakSize); + + // The Parameters are now in the local array, so we update the entries in the ParamTable with their values from the LocalTable + ParamTableEntry entry; + for (int i=0; i<_paramTable->getParamCount(); i += ((entry.notInitialized) ? 1 : entry.slots)) + { + // The indexes should be the same in both tables + // because (as far as we know) the parameters + // are indexed the same as locals and parameters + localTable->getEntry(i, &entry); + _paramTable->setEntry(i, &entry); + } + + TR::GCStackAtlas *atlas = _mjitCGGC->createStackAtlas(_comp, _paramTable, localTable); + if (!atlas) + return 0; + _atlas = atlas; + + // Support to paint allocated frame slots. + if (( _comp->getOption(TR_PaintAllocatedFrameSlotsDead) || _comp->getOption(TR_PaintAllocatedFrameSlotsFauxObject) ) && allocSize!=0) + { + uint64_t paintValue = 0; + + // Paint the slots with deadf00d + if (_comp->getOption(TR_PaintAllocatedFrameSlotsDead)) + paintValue = (uint64_t)CONSTANT64(0xdeadf00ddeadf00d); + else //Paint stack slots with a arbitrary object aligned address. + paintValue = ((uintptr_t) ((uintptr_t)_comp->getOptions()->getHeapBase() + (uintptr_t) 4096)); + + COPY_TEMPLATE(buffer, paintRegister, prologueSize); + patchImm8(buffer, paintValue); + for (int32_t i=_paramTable->getParamCount(); igetLocalCount();) + { + if (entry.notInitialized) + { + i++; + continue; + } + localTable->getEntry(i, &entry); + COPY_TEMPLATE(buffer, paintLocal, prologueSize); + patchImm4(buffer, i * pointerSize); + i += entry.slots; + } + } + uint32_t count = J9ROMMETHOD_HAS_BACKWARDS_BRANCHES(romMethod) ? TR_DEFAULT_INITIAL_BCOUNT : TR_DEFAULT_INITIAL_COUNT; + prologueSize += generateGCR(buffer, count, method, startPC); + + return prologueSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateEpologue(char *buffer) + { + buffer_size_t epologueSize = loadPreserveRegisters(buffer, _stackPeakSize-8); + buffer += epologueSize; + COPY_TEMPLATE(buffer, addRSPImm4, epologueSize); + patchImm4(buffer, _stackPeakSize - _maxCalleeArgsSize); + return epologueSize; + } + +// Macros to clean up the switch case for generate body +#define loadCasePrologue(byteCodeCaseType) \ + case TR_J9ByteCode::byteCodeCaseType +#define loadCaseBody(byteCodeCaseType) \ + goto GenericLoadCall +#define storeCasePrologue(byteCodeCaseType) \ + case TR_J9ByteCode::byteCodeCaseType +#define storeCaseBody(byteCodeCaseType) \ + goto GenericStoreCall + +// used for each conditional and jump except gotow (wide) +#define branchByteCode(byteCode, template) \ + case byteCode: \ + { \ + _targetIndex = bci->next2BytesSigned() + bci->currentByteCodeIndex(); \ + COPY_TEMPLATE(buffer, template, codeGenSize); \ + MJIT::JumpTableEntry _entry(_targetIndex, buffer); \ + _jumpTable[_targetCounter] = _entry; \ + _targetCounter++; \ + break; \ + } + +buffer_size_t +MJIT::CodeGenerator::generateBody(char *buffer, TR_J9ByteCodeIterator *bci) + { + buffer_size_t codeGenSize = 0; + buffer_size_t calledCGSize = 0; + int32_t _currentByteCodeIndex = 0; + int32_t _targetIndex = 0; + int32_t _targetCounter = 0; + +#ifdef MJIT_DEBUG_BC_WALKING + std::string _bcMnemonic, _bcText; + u_int8_t _bcOpcode; +#endif + + uintptr_t SSEfloatRemainder; + uintptr_t SSEdoubleRemainder; + + // Map between ByteCode Index to generated JITed code address + char *_byteCodeIndexToJitTable[bci->maxByteCodeIndex()]; + + // Each time we encounter a branch or goto + // add an entry. After codegen completes use + // these entries to patch addresses using _byteCodeIndexToJitTable. + MJIT::JumpTableEntry _jumpTable[bci->maxByteCodeIndex()]; + +#ifdef MJIT_DEBUG_BC_WALKING + MJIT_DEBUG_BC_LOG(_logFileFP, "\nMicroJIT Bytecode Dump:\n"); + bci->printByteCodes(); +#endif + + struct TR::X86LinkageProperties *properties = &_linkage._properties; + TR::RealRegister::RegNum returnRegIndex; + U_16 slotCountRem = 0; + bool isDouble = false; + for (TR_J9ByteCode bc = bci->first(); bc != J9BCunknown; bc = bci->next()) + { +#ifdef MJIT_DEBUG_BC_WALKING + // In cases of conjunction of two bytecodes, the _bcMnemonic needs to be replaced by correct bytecodes, + // i.e., JBaload0getfield needs to be replaced by JBaload0 + // and JBnewdup by JBnew + if (!strcmp(bci->currentMnemonic(), "JBaload0getfield")) + { + const char * newMnemonic = "JBaload0"; + _bcMnemonic = std::string(newMnemonic); + } + else if (!strcmp(bci->currentMnemonic(), "JBnewdup")) + { + const char * newMnemonic = "JBnew"; + _bcMnemonic = std::string(newMnemonic); + } + else + { + _bcMnemonic = std::string(bci->currentMnemonic()); + } + _bcOpcode = bci->currentOpcode(); + _bcText = _bcMnemonic + "\n"; + MJIT_DEBUG_BC_LOG(_logFileFP, _bcText.c_str()); +#endif + + _currentByteCodeIndex = bci->currentByteCodeIndex(); + + // record BCI to JIT address + _byteCodeIndexToJitTable[_currentByteCodeIndex] = buffer; + + switch (bc) + { + loadCasePrologue(J9BCiload0): + loadCaseBody(J9BCiload0); + loadCasePrologue(J9BCiload1): + loadCaseBody(J9BCiload1); + loadCasePrologue(J9BCiload2): + loadCaseBody(J9BCiload2); + loadCasePrologue(J9BCiload3): + loadCaseBody(J9BCiload3); + loadCasePrologue(J9BCiload): + loadCaseBody(J9BCiload); + loadCasePrologue(J9BClload0): + loadCaseBody(J9BClload0); + loadCasePrologue(J9BClload1): + loadCaseBody(J9BClload1); + loadCasePrologue(J9BClload2): + loadCaseBody(J9BClload2); + loadCasePrologue(J9BClload3): + loadCaseBody(J9BClload3); + loadCasePrologue(J9BClload): + loadCaseBody(J9BClload); + loadCasePrologue(J9BCfload0): + loadCaseBody(J9BCfload0); + loadCasePrologue(J9BCfload1): + loadCaseBody(J9BCfload1); + loadCasePrologue(J9BCfload2): + loadCaseBody(J9BCfload2); + loadCasePrologue(J9BCfload3): + loadCaseBody(J9BCfload3); + loadCasePrologue(J9BCfload): + loadCaseBody(J9BCfload); + loadCasePrologue(J9BCdload0): + loadCaseBody(J9BCdload0); + loadCasePrologue(J9BCdload1): + loadCaseBody(J9BCdload1); + loadCasePrologue(J9BCdload2): + loadCaseBody(J9BCdload2); + loadCasePrologue(J9BCdload3): + loadCaseBody(J9BCdload3); + loadCasePrologue(J9BCdload): + loadCaseBody(J9BCdload); + loadCasePrologue(J9BCaload0): + loadCaseBody(J9BCaload0); + loadCasePrologue(J9BCaload1): + loadCaseBody(J9BCaload1); + loadCasePrologue(J9BCaload2): + loadCaseBody(J9BCaload2); + loadCasePrologue(J9BCaload3): + loadCaseBody(J9BCaload3); + loadCasePrologue(J9BCaload): + loadCaseBody(J9BCaload); + GenericLoadCall: + if(calledCGSize = generateLoad(buffer, bc, bci)) + { + buffer += calledCGSize; + codeGenSize += calledCGSize; + } + else + { +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "Unsupported load bytecode - %s (%d)\n", _bcMnemonic.c_str(), _bcOpcode); +#endif + return 0; + } + break; + storeCasePrologue(J9BCistore0): + storeCaseBody(J9BCistore0); + storeCasePrologue(J9BCistore1): + storeCaseBody(J9BCistore1); + storeCasePrologue(J9BCistore2): + storeCaseBody(J9BCistore2); + storeCasePrologue(J9BCistore3): + storeCaseBody(J9BCistore3); + storeCasePrologue(J9BCistore): + storeCaseBody(J9BCistore); + storeCasePrologue(J9BClstore0): + storeCaseBody(J9BClstore0); + storeCasePrologue(J9BClstore1): + storeCaseBody(J9BClstore1); + storeCasePrologue(J9BClstore2): + storeCaseBody(J9BClstore2); + storeCasePrologue(J9BClstore3): + storeCaseBody(J9BClstore3); + storeCasePrologue(J9BClstore): + storeCaseBody(J9BClstore); + storeCasePrologue(J9BCfstore0): + storeCaseBody(J9BCfstore0); + storeCasePrologue(J9BCfstore1): + storeCaseBody(J9BCfstore1); + storeCasePrologue(J9BCfstore2): + storeCaseBody(J9BCfstore2); + storeCasePrologue(J9BCfstore3): + storeCaseBody(J9BCfstore3); + storeCasePrologue(J9BCfstore): + storeCaseBody(J9BCfstore); + storeCasePrologue(J9BCdstore0): + storeCaseBody(J9BCdstore0); + storeCasePrologue(J9BCdstore1): + storeCaseBody(J9BCdstore1); + storeCasePrologue(J9BCdstore2): + storeCaseBody(J9BCdstore2); + storeCasePrologue(J9BCdstore3): + storeCaseBody(J9BCdstore3); + storeCasePrologue(J9BCdstore): + storeCaseBody(J9BCdstore); + storeCasePrologue(J9BCastore0): + storeCaseBody(J9BCastore0); + storeCasePrologue(J9BCastore1): + storeCaseBody(J9BCastore1); + storeCasePrologue(J9BCastore2): + storeCaseBody(J9BCastore2); + storeCasePrologue(J9BCastore3): + storeCaseBody(J9BCastore3); + storeCasePrologue(J9BCastore): + storeCaseBody(J9BCastore); + GenericStoreCall: + if (calledCGSize = generateStore(buffer, bc, bci)) + { + buffer += calledCGSize; + codeGenSize += calledCGSize; + } + else + { +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "Unsupported store bytecode - %s (%d)\n", _bcMnemonic.c_str(), _bcOpcode); +#endif + return 0; + } + break; + branchByteCode(TR_J9ByteCode::J9BCgoto,gotoTemplate); + branchByteCode(TR_J9ByteCode::J9BCifne,ifneTemplate); + branchByteCode(TR_J9ByteCode::J9BCifeq,ifeqTemplate); + branchByteCode(TR_J9ByteCode::J9BCiflt,ifltTemplate); + branchByteCode(TR_J9ByteCode::J9BCifle,ifleTemplate); + branchByteCode(TR_J9ByteCode::J9BCifgt,ifgtTemplate); + branchByteCode(TR_J9ByteCode::J9BCifge,ifgeTemplate); + branchByteCode(TR_J9ByteCode::J9BCificmpeq,ificmpeqTemplate); + branchByteCode(TR_J9ByteCode::J9BCificmpne,ificmpneTemplate); + branchByteCode(TR_J9ByteCode::J9BCificmplt,ificmpltTemplate); + branchByteCode(TR_J9ByteCode::J9BCificmple,ificmpleTemplate); + branchByteCode(TR_J9ByteCode::J9BCificmpge,ificmpgeTemplate); + branchByteCode(TR_J9ByteCode::J9BCificmpgt,ificmpgtTemplate); + branchByteCode(TR_J9ByteCode::J9BCifacmpeq,ifacmpeqTemplate); + branchByteCode(TR_J9ByteCode::J9BCifacmpne,ifacmpneTemplate); + branchByteCode(TR_J9ByteCode::J9BCifnull,ifnullTemplate); + branchByteCode(TR_J9ByteCode::J9BCifnonnull,ifnonnullTemplate); + /* Commenting out as this is currently untested + case TR_J9ByteCode::J9BCgotow: + { + _targetIndex = bci->next4BytesSigned() + bci->currentByteCodeIndex(); // Copied because of 4 byte jump + COPY_TEMPLATE(buffer, gotoTemplate, codeGenSize); + MJIT::JumpTableEntry _entry(_targetIndex, buffer); + _jumpTable[_targetCounter] = _entry; + _targetCounter++; + break; + } + */ + case TR_J9ByteCode::J9BClcmp: + COPY_TEMPLATE(buffer, lcmpTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCfcmpl: + COPY_TEMPLATE(buffer, fcmplTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCfcmpg: + COPY_TEMPLATE(buffer, fcmpgTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCdcmpl: + COPY_TEMPLATE(buffer, dcmplTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCdcmpg: + COPY_TEMPLATE(buffer, dcmpgTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCpop: + COPY_TEMPLATE(buffer, popTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCpop2: + COPY_TEMPLATE(buffer, pop2Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCswap: + COPY_TEMPLATE(buffer, swapTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCdup: + COPY_TEMPLATE(buffer, dupTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCdupx1: + COPY_TEMPLATE(buffer, dupx1Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCdupx2: + COPY_TEMPLATE(buffer, dupx2Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCdup2: + COPY_TEMPLATE(buffer, dup2Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCdup2x1: + COPY_TEMPLATE(buffer, dup2x1Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCdup2x2: + COPY_TEMPLATE(buffer, dup2x2Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCgetfield: + if (calledCGSize = generateGetField(buffer, bci)) + { + buffer += calledCGSize; + codeGenSize += calledCGSize; + } + else + { +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "Unsupported getField bytecode - %s (%d)\n", _bcMnemonic.c_str(), _bcOpcode); +#endif + return 0; + } + break; + case TR_J9ByteCode::J9BCputfield: + return 0; + if (calledCGSize = generatePutField(buffer, bci)) + { + buffer += calledCGSize; + codeGenSize += calledCGSize; + } + else + { +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "Unsupported putField bytecode - %s (%d)\n", _bcMnemonic.c_str(), _bcOpcode); +#endif + return 0; + } + break; + case TR_J9ByteCode::J9BCgetstatic: + if (calledCGSize = generateGetStatic(buffer, bci)) + { + buffer += calledCGSize; + codeGenSize += calledCGSize; + } + else + { +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "Unsupported getstatic bytecode - %s (%d)\n", _bcMnemonic.c_str(), _bcOpcode); +#endif + return 0; + } + break; + case TR_J9ByteCode::J9BCputstatic: + return 0; + if (calledCGSize = generatePutStatic(buffer, bci)) + { + buffer += calledCGSize; + codeGenSize += calledCGSize; + } + else + { +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "Unsupported putstatic bytecode - %s %d\n", _bcMnemonic.c_str(), _bcOpcode); +#endif + return 0; + } + break; + case TR_J9ByteCode::J9BCiadd: + COPY_TEMPLATE(buffer, iAddTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCisub: + COPY_TEMPLATE(buffer, iSubTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCimul: + COPY_TEMPLATE(buffer, iMulTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCidiv: + COPY_TEMPLATE(buffer, iDivTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCirem: + COPY_TEMPLATE(buffer, iRemTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCineg: + COPY_TEMPLATE(buffer, iNegTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCishl: + COPY_TEMPLATE(buffer, iShlTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCishr: + COPY_TEMPLATE(buffer, iShrTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCiushr: + COPY_TEMPLATE(buffer, iUshrTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCiand: + COPY_TEMPLATE(buffer, iAndTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCior: + COPY_TEMPLATE(buffer, iOrTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCixor: + COPY_TEMPLATE(buffer, iXorTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCi2l: + COPY_TEMPLATE(buffer, i2lTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCl2i: + COPY_TEMPLATE(buffer, l2iTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCi2b: + COPY_TEMPLATE(buffer, i2bTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCi2s: + COPY_TEMPLATE(buffer, i2sTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCi2c: + COPY_TEMPLATE(buffer, i2cTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCi2d: + COPY_TEMPLATE(buffer, i2dTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCl2d: + COPY_TEMPLATE(buffer, l2dTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCd2i: + COPY_TEMPLATE(buffer, d2iTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCd2l: + COPY_TEMPLATE(buffer, d2lTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCiconstm1: + COPY_TEMPLATE(buffer, iconstm1Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCiconst0: + COPY_TEMPLATE(buffer, iconst0Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCiconst1: + COPY_TEMPLATE(buffer, iconst1Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCiconst2: + COPY_TEMPLATE(buffer, iconst2Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCiconst3: + COPY_TEMPLATE(buffer, iconst3Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCiconst4: + COPY_TEMPLATE(buffer, iconst4Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCiconst5: + COPY_TEMPLATE(buffer, iconst5Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCbipush: + COPY_TEMPLATE(buffer, bipushTemplate, codeGenSize); + patchImm4(buffer, (U_32)bci->nextByteSigned()); + break; + case TR_J9ByteCode::J9BCsipush: + COPY_TEMPLATE(buffer, sipushTemplatePrologue, codeGenSize); + patchImm2(buffer, (U_32)bci->next2Bytes()); + COPY_TEMPLATE(buffer, sipushTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCiinc: + { + uint8_t index = bci->nextByte(); + int8_t value = bci->nextByteSigned(2); + COPY_TEMPLATE(buffer, iIncTemplate_01_load, codeGenSize); + patchImm4(buffer, (U_32)(index*8)); + COPY_TEMPLATE(buffer, iIncTemplate_02_add, codeGenSize); + patchImm1(buffer, value); + COPY_TEMPLATE(buffer, iIncTemplate_03_store, codeGenSize); + patchImm4(buffer, (U_32)(index*8)); + break; + } + case TR_J9ByteCode::J9BCladd: + COPY_TEMPLATE(buffer, lAddTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClsub: + COPY_TEMPLATE(buffer, lSubTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClmul: + COPY_TEMPLATE(buffer, lMulTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCldiv: + COPY_TEMPLATE(buffer, lDivTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClrem: + COPY_TEMPLATE(buffer, lRemTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClneg: + COPY_TEMPLATE(buffer, lNegTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClshl: + COPY_TEMPLATE(buffer, lShlTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClshr: + COPY_TEMPLATE(buffer, lShrTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClushr: + COPY_TEMPLATE(buffer, lUshrTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCland: + COPY_TEMPLATE(buffer, lAndTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClor: + COPY_TEMPLATE(buffer, lOrTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClxor: + COPY_TEMPLATE(buffer, lXorTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BClconst0: + COPY_TEMPLATE(buffer, lconst0Template, codeGenSize); + break; + case TR_J9ByteCode::J9BClconst1: + COPY_TEMPLATE(buffer, lconst1Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCfadd: + COPY_TEMPLATE(buffer, fAddTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCfsub: + COPY_TEMPLATE(buffer, fSubTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCfmul: + COPY_TEMPLATE(buffer, fMulTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCfdiv: + COPY_TEMPLATE(buffer, fDivTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCfrem: + COPY_TEMPLATE(buffer, fRemTemplate, codeGenSize); + SSEfloatRemainder = getHelperOrTrampolineAddress(TR_AMD64floatRemainder, (uintptr_t)buffer); + PATCH_RELATIVE_ADDR_32_BIT(buffer, (intptr_t)SSEfloatRemainder); + slotCountRem = 1; + goto genRem; + case TR_J9ByteCode::J9BCdrem: + COPY_TEMPLATE(buffer, dRemTemplate, codeGenSize); + SSEdoubleRemainder = getHelperOrTrampolineAddress(TR_AMD64doubleRemainder, (uintptr_t)buffer); + PATCH_RELATIVE_ADDR_32_BIT(buffer, (intptr_t)SSEdoubleRemainder); + slotCountRem = 2; + isDouble = true; + goto genRem; + genRem: + returnRegIndex = properties->getFloatReturnRegister(); + COPY_TEMPLATE(buffer, retTemplate_sub, codeGenSize); + patchImm1(buffer, slotCountRem*8); + switch (returnRegIndex) + { + case TR::RealRegister::xmm0: + if (isDouble) + COPY_TEMPLATE(buffer, loadDxmm0Return, codeGenSize); + else + COPY_TEMPLATE(buffer, loadxmm0Return, codeGenSize); + break; + } + break; + case TR_J9ByteCode::J9BCfneg: + COPY_TEMPLATE(buffer, fNegTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCfconst0: + COPY_TEMPLATE(buffer, fconst0Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCfconst1: + COPY_TEMPLATE(buffer, fconst1Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCfconst2: + COPY_TEMPLATE(buffer, fconst2Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCdconst0: + COPY_TEMPLATE(buffer, dconst0Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCdconst1: + COPY_TEMPLATE(buffer, dconst1Template, codeGenSize); + break; + case TR_J9ByteCode::J9BCdadd: + COPY_TEMPLATE(buffer, dAddTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCdsub: + COPY_TEMPLATE(buffer, dSubTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCdmul: + COPY_TEMPLATE(buffer, dMulTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCddiv: + COPY_TEMPLATE(buffer, dDivTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCdneg: + COPY_TEMPLATE(buffer, dNegTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCi2f: + COPY_TEMPLATE(buffer, i2fTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCf2i: + COPY_TEMPLATE(buffer, f2iTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCl2f: + COPY_TEMPLATE(buffer, l2fTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCf2l: + COPY_TEMPLATE(buffer, f2lTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCd2f: + COPY_TEMPLATE(buffer, d2fTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCf2d: + COPY_TEMPLATE(buffer, f2dTemplate, codeGenSize); + break; + case TR_J9ByteCode::J9BCReturnB: /* FALLTHROUGH */ + case TR_J9ByteCode::J9BCReturnC: /* FALLTHROUGH */ + case TR_J9ByteCode::J9BCReturnS: /* FALLTHROUGH */ + case TR_J9ByteCode::J9BCReturnZ: /* FALLTHROUGH */ + case TR_J9ByteCode::J9BCgenericReturn: + if (calledCGSize = generateReturn(buffer, _compilee->returnType(), properties)) + { + buffer += calledCGSize; + codeGenSize += calledCGSize; + } + else if (_compilee->returnType() == TR::Int64) + { + buffer_size_t epilogueSize = generateEpologue(buffer); + buffer += epilogueSize; + codeGenSize += epilogueSize; + COPY_TEMPLATE(buffer, eaxReturnTemplate, codeGenSize); + COPY_TEMPLATE(buffer, retTemplate_add, codeGenSize); + patchImm1(buffer, 8); + COPY_TEMPLATE(buffer, vReturnTemplate, codeGenSize); + } + else + { +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "Unknown Return type: %d\n", _compilee->returnType()); +#endif + return 0; + } + break; + case TR_J9ByteCode::J9BCinvokestatic: + if (calledCGSize = generateInvokeStatic(buffer, bci, properties)) + { + buffer += calledCGSize; + codeGenSize += calledCGSize; + } + else + { +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "Unsupported invokestatic bytecode %d\n", bc); +#endif + return 0; + } + break; + default: +#ifdef MJIT_DEBUG_BC_WALKING + trfprintf(_logFileFP, "no match for bytecode - %s (%d)\n", _bcMnemonic.c_str(), _bcOpcode); +#endif + return 0; + } + } + + // patch branches and goto addresses + for (int i = 0; i < _targetCounter; i++) + { + MJIT::JumpTableEntry e = _jumpTable[i]; + char *_target = _byteCodeIndexToJitTable[e.byteCodeIndex]; + PATCH_RELATIVE_ADDR_32_BIT(e.codeCacheAddress, (uintptr_t)_target); + } + + return codeGenSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateEdgeCounter(char *buffer, TR_J9ByteCodeIterator *bci) + { + buffer_size_t returnSize = 0; + // TODO: MicroJIT: Get the pointer to the profiling counter + uint32_t *profilingCounterLocation = NULL; + + COPY_TEMPLATE(buffer, loadCounter, returnSize); + patchImm8(buffer, (uint64_t)profilingCounterLocation); + COPY_TEMPLATE(buffer, incrementCounter, returnSize); + patchImm8(buffer, (uint64_t)profilingCounterLocation); + + return returnSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateReturn(char *buffer, TR::DataType dt, struct TR::X86LinkageProperties *properties) + { + buffer_size_t returnSize = 0; + buffer_size_t calledCGSize = 0; + TR::RealRegister::RegNum returnRegIndex; + U_16 slotCount = 0; + bool isAddressOrLong = false; + switch (dt) + { + case TR::Address: + slotCount = 1; + isAddressOrLong = true; + returnRegIndex = properties->getIntegerReturnRegister(); + goto genReturn; + case TR::Int64: + slotCount = 2; + isAddressOrLong = true; + returnRegIndex = properties->getIntegerReturnRegister(); + goto genReturn; + case TR::Int8: /* FALLTHROUGH */ + case TR::Int16: /* FALLTHROUGH */ + case TR::Int32: /* FALLTHROUGH */ + slotCount = 1; + returnRegIndex = properties->getIntegerReturnRegister(); + goto genReturn; + case TR::Float: + slotCount = 1; + returnRegIndex = properties->getFloatReturnRegister(); + goto genReturn; + case TR::Double: + slotCount = 2; + returnRegIndex = properties->getFloatReturnRegister(); + goto genReturn; + case TR::NoType: + returnRegIndex = TR::RealRegister::NoReg; + goto genReturn; + genReturn: + calledCGSize = generateEpologue(buffer); + buffer += calledCGSize; + returnSize += calledCGSize; + switch (returnRegIndex) + { + case TR::RealRegister::eax: + if (isAddressOrLong) + COPY_TEMPLATE(buffer, raxReturnTemplate, returnSize); + else + COPY_TEMPLATE(buffer, eaxReturnTemplate, returnSize); + goto genSlots; + case TR::RealRegister::xmm0: + COPY_TEMPLATE(buffer, xmm0ReturnTemplate, returnSize); + goto genSlots; + case TR::RealRegister::NoReg: + goto genSlots; + genSlots: + if (slotCount) + { + COPY_TEMPLATE(buffer, retTemplate_add, returnSize); + patchImm1(buffer, slotCount*8); + } + COPY_TEMPLATE(buffer, vReturnTemplate, returnSize); + break; + } + break; + default: + trfprintf(_logFileFP, "Argument type %s is not supported\n", dt.toString()); + break; + } + return returnSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateLoad(char *buffer, TR_J9ByteCode bc, TR_J9ByteCodeIterator *bci) + { + char *signature = _compilee->signatureChars(); + int index = -1; + buffer_size_t loadSize = 0; + switch (bc) + { + GenerateATemplate: + COPY_TEMPLATE(buffer, aloadTemplatePrologue, loadSize); + patchImm4(buffer, (U_32)(index*8)); + COPY_TEMPLATE(buffer, loadTemplate, loadSize); + break; + GenerateITemplate: + COPY_TEMPLATE(buffer, iloadTemplatePrologue, loadSize); + patchImm4(buffer, (U_32)(index*8)); + COPY_TEMPLATE(buffer, loadTemplate, loadSize); + break; + GenerateLTemplate: + COPY_TEMPLATE(buffer, lloadTemplatePrologue, loadSize); + patchImm4(buffer, (U_32)(index*8)); + COPY_TEMPLATE(buffer, loadTemplate, loadSize); + break; + case TR_J9ByteCode::J9BClload0: + case TR_J9ByteCode::J9BCdload0: + index = 0; + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfload0: + case TR_J9ByteCode::J9BCiload0: + index = 0; + goto GenerateITemplate; + case TR_J9ByteCode::J9BCaload0: + index = 0; + goto GenerateATemplate; + case TR_J9ByteCode::J9BClload1: + case TR_J9ByteCode::J9BCdload1: + index = 1; + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfload1: + case TR_J9ByteCode::J9BCiload1: + index = 1; + goto GenerateITemplate; + case TR_J9ByteCode::J9BCaload1: + index = 1; + goto GenerateATemplate; + case TR_J9ByteCode::J9BClload2: + case TR_J9ByteCode::J9BCdload2: + index = 2; + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfload2: + case TR_J9ByteCode::J9BCiload2: + index = 2; + goto GenerateITemplate; + case TR_J9ByteCode::J9BCaload2: + index = 2; + goto GenerateATemplate; + case TR_J9ByteCode::J9BClload3: + case TR_J9ByteCode::J9BCdload3: + index = 3; + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfload3: + case TR_J9ByteCode::J9BCiload3: + index = 3; + goto GenerateITemplate; + case TR_J9ByteCode::J9BCaload3: + index = 3; + goto GenerateATemplate; + case TR_J9ByteCode::J9BClload: + case TR_J9ByteCode::J9BCdload: + index = bci->nextByte(); + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfload: + case TR_J9ByteCode::J9BCiload: + index = bci->nextByte(); + goto GenerateITemplate; + case TR_J9ByteCode::J9BCaload: + index = bci->nextByte(); + goto GenerateATemplate; + default: + return 0; + } + return loadSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateStore(char *buffer, TR_J9ByteCode bc, TR_J9ByteCodeIterator *bci) + { + char *signature = _compilee->signatureChars(); + int index = -1; + buffer_size_t storeSize = 0; + switch (bc) + { + GenerateATemplate: + COPY_TEMPLATE(buffer, astoreTemplate, storeSize); + patchImm4(buffer, (U_32)(index*8)); + break; + GenerateLTemplate: + COPY_TEMPLATE(buffer, lstoreTemplate, storeSize); + patchImm4(buffer, (U_32)(index*8)); + break; + GenerateITemplate: + COPY_TEMPLATE(buffer, istoreTemplate, storeSize); + patchImm4(buffer, (U_32)(index*8)); + break; + case TR_J9ByteCode::J9BClstore0: + case TR_J9ByteCode::J9BCdstore0: + index = 0; + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfstore0: + case TR_J9ByteCode::J9BCistore0: + index = 0; + goto GenerateITemplate; + case TR_J9ByteCode::J9BCastore0: + index = 0; + goto GenerateATemplate; + case TR_J9ByteCode::J9BClstore1: + case TR_J9ByteCode::J9BCdstore1: + index = 1; + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfstore1: + case TR_J9ByteCode::J9BCistore1: + index = 1; + goto GenerateITemplate; + case TR_J9ByteCode::J9BCastore1: + index = 1; + goto GenerateATemplate; + case TR_J9ByteCode::J9BClstore2: + case TR_J9ByteCode::J9BCdstore2: + index = 2; + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfstore2: + case TR_J9ByteCode::J9BCistore2: + index = 2; + goto GenerateITemplate; + case TR_J9ByteCode::J9BCastore2: + index = 2; + goto GenerateATemplate; + case TR_J9ByteCode::J9BClstore3: + case TR_J9ByteCode::J9BCdstore3: + index = 3; + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfstore3: + case TR_J9ByteCode::J9BCistore3: + index = 3; + goto GenerateITemplate; + case TR_J9ByteCode::J9BCastore3: + index = 3; + goto GenerateATemplate; + case TR_J9ByteCode::J9BClstore: + case TR_J9ByteCode::J9BCdstore: + index = bci->nextByte(); + goto GenerateLTemplate; + case TR_J9ByteCode::J9BCfstore: + case TR_J9ByteCode::J9BCistore: + index = bci->nextByte(); + goto GenerateITemplate; + case TR_J9ByteCode::J9BCastore: + index = bci->nextByte(); + goto GenerateATemplate; + default: + return 0; + } + return storeSize; + } + +// TODO: MicroJIT: see ::emitSnippitCode and redo this to use snippits +buffer_size_t +MJIT::CodeGenerator::generateArgumentMoveForStaticMethod( + char *buffer, + TR_ResolvedMethod *staticMethod, + char *typeString, + U_16 typeStringLength, + U_16 slotCount, + struct TR::X86LinkageProperties *properties) + { + buffer_size_t argumentMoveSize = 0; + + // Pull out parameters and find their place for calling + U_16 paramCount = MJIT::getParamCount(typeString, typeStringLength); + U_16 actualParamCount = MJIT::getActualParamCount(typeString, typeStringLength); + // TODO: MicroJIT: Implement passing arguments on stack when surpassing register count + if (actualParamCount > 4) + return 0; + + if (!paramCount) + return -1; + + MJIT::ParamTableEntry paramTableEntries[paramCount]; + for (int i=0; i 0) + { + if (paramTableEntries[lastIndex].notInitialized) + lastIndex--; + else + break; + } + bool isLongOrDouble; + bool isReference; + TR::RealRegister::RegNum argRegNum; + MJIT::ParamTableEntry entry; + while (actualParamCount > 0) + { + isLongOrDouble = false; + isReference = false; + argRegNum = TR::RealRegister::NoReg; + switch (actualParamCount-1) + { + case 0: /* FALLTHROUGH */ + case 1: /* FALLTHROUGH */ + case 2: /* FALLTHROUGH */ + case 3: + { + switch (typeString[(actualParamCount-1)+3]) + { + case MJIT::BYTE_TYPE_CHARACTER: + case MJIT::CHAR_TYPE_CHARACTER: + case MJIT::INT_TYPE_CHARACTER: + case MJIT::SHORT_TYPE_CHARACTER: + case MJIT::BOOLEAN_TYPE_CHARACTER: + argRegNum = properties->getIntegerArgumentRegister(actualParamCount-1); + goto genArg; + case MJIT::LONG_TYPE_CHARACTER: + isLongOrDouble = true; + argRegNum = properties->getIntegerArgumentRegister(actualParamCount-1); + goto genArg; + case MJIT::CLASSNAME_TYPE_CHARACTER: + isReference = true; + argRegNum = properties->getIntegerArgumentRegister(actualParamCount-1); + goto genArg; + case MJIT::DOUBLE_TYPE_CHARACTER: + isLongOrDouble = true; + argRegNum = properties->getFloatArgumentRegister(actualParamCount-1); + goto genArg; + case MJIT::FLOAT_TYPE_CHARACTER: + argRegNum = properties->getFloatArgumentRegister(actualParamCount-1); + goto genArg; + genArg: + switch (argRegNum) + { + case TR::RealRegister::eax: + if (isLongOrDouble) + COPY_TEMPLATE(buffer, moveraxForCall, argumentMoveSize); + else if (isReference) + COPY_TEMPLATE(buffer, moveraxRefForCall, argumentMoveSize); + else + COPY_TEMPLATE(buffer, moveeaxForCall, argumentMoveSize); + break; + case TR::RealRegister::esi: + if (isLongOrDouble) + COPY_TEMPLATE(buffer, moversiForCall, argumentMoveSize); + else if (isReference) + COPY_TEMPLATE(buffer, moversiRefForCall, argumentMoveSize); + else + COPY_TEMPLATE(buffer, moveesiForCall, argumentMoveSize); + break; + case TR::RealRegister::edx: + if (isLongOrDouble) + COPY_TEMPLATE(buffer, moverdxForCall, argumentMoveSize); + else if (isReference) + COPY_TEMPLATE(buffer, moverdxRefForCall, argumentMoveSize); + else + COPY_TEMPLATE(buffer, moveedxForCall, argumentMoveSize); + break; + case TR::RealRegister::ecx: + if (isLongOrDouble) + COPY_TEMPLATE(buffer, movercxForCall, argumentMoveSize); + else if (isReference) + COPY_TEMPLATE(buffer, movercxRefForCall, argumentMoveSize); + else + COPY_TEMPLATE(buffer, moveecxForCall, argumentMoveSize); + break; + case TR::RealRegister::xmm0: + if (isLongOrDouble) + COPY_TEMPLATE(buffer, moveDxmm0ForCall, argumentMoveSize); + else + COPY_TEMPLATE(buffer, movexmm0ForCall, argumentMoveSize); + break; + case TR::RealRegister::xmm1: + if (isLongOrDouble) + COPY_TEMPLATE(buffer, moveDxmm1ForCall, argumentMoveSize); + else + COPY_TEMPLATE(buffer, movexmm1ForCall, argumentMoveSize); + break; + case TR::RealRegister::xmm2: + if (isLongOrDouble) + COPY_TEMPLATE(buffer, moveDxmm2ForCall, argumentMoveSize); + else + COPY_TEMPLATE(buffer, movexmm2ForCall, argumentMoveSize); + break; + case TR::RealRegister::xmm3: + if (isLongOrDouble) + COPY_TEMPLATE(buffer, moveDxmm3ForCall, argumentMoveSize); + else + COPY_TEMPLATE(buffer, movexmm3ForCall, argumentMoveSize); + break; + } + break; + } + break; + } + default: + // TODO: MicroJIT: Add stack argument passing here + break; + } + actualParamCount--; + } + + argumentMoveSize += saveArgsOnStack(buffer, -8, ¶mTable); + + return argumentMoveSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateInvokeStatic(char *buffer, TR_J9ByteCodeIterator *bci, struct TR::X86LinkageProperties *properties) + { + buffer_size_t invokeStaticSize = 0; + int32_t cpIndex = (int32_t)bci->next2Bytes(); + + // Attempt to resolve the address at compile time. This can fail, and if it does we must fail the compilation for now. + // In the future research project we could attempt runtime resolution. TR does this with self modifying code. + // These are all from TR and likely have implications on how we use this address. However, we do not know that as of yet. + bool isUnresolvedInCP; + TR_ResolvedMethod *resolved = _compilee->getResolvedStaticMethod(_comp, cpIndex, &isUnresolvedInCP); + // TODO: MicroJIT: Split this into generateInvokeStaticJava and generateInvokeStaticNative + // and call the correct one with required args. Do not turn this method into a mess. + if (!resolved || resolved->isNative()) + return 0; + + // Get method signature + J9Method *ramMethod = static_cast(resolved)->ramMethod(); + J9ROMMethod *romMethod = J9_ROM_METHOD_FROM_RAM_METHOD(ramMethod); + U_16 maxLength = J9UTF8_LENGTH(J9ROMMETHOD_SIGNATURE(romMethod)); + char typeString[maxLength]; + if (MJIT::nativeSignature(ramMethod, typeString)) + return 0; + + // TODO: MicroJIT: Find the maximum size that will need to be saved + // for calling a method and reserve it in the prologue. + // This could be done during the meta-data gathering phase. + U_16 slotCount = MJIT::getMJITStackSlotCount(typeString, maxLength); + + // Generate template and instantiate immediate values + buffer_size_t argMoveSize = generateArgumentMoveForStaticMethod(buffer, resolved, typeString, maxLength, slotCount, properties); + + if (!argMoveSize) + return 0; + else if(-1 == argMoveSize) + argMoveSize = 0; + buffer += argMoveSize; + invokeStaticSize += argMoveSize; + + // Save Caller preserved registers + COPY_TEMPLATE(buffer, saveR10Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x0 + (slotCount * 8))); + COPY_TEMPLATE(buffer, saveR11Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x8 + (slotCount * 8))); + COPY_TEMPLATE(buffer, saveR12Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x10 + (slotCount * 8))); + COPY_TEMPLATE(buffer, saveR13Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x18 + (slotCount * 8))); + COPY_TEMPLATE(buffer, saveR14Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x20 + (slotCount * 8))); + COPY_TEMPLATE(buffer, saveR15Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x28 + (slotCount * 8))); + COPY_TEMPLATE(buffer, invokeStaticTemplate, invokeStaticSize); + patchImm8(buffer, (U_64)ramMethod); + + // Call the glue code + COPY_TEMPLATE(buffer, call4ByteRel, invokeStaticSize); + intptr_t interpreterStaticAndSpecialGlue = getHelperOrTrampolineAddress(TR_X86interpreterStaticAndSpecialGlue, (uintptr_t)buffer); + PATCH_RELATIVE_ADDR_32_BIT(buffer, interpreterStaticAndSpecialGlue); + + // Load Caller preserved registers + COPY_TEMPLATE(buffer, loadR10Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x00 + (slotCount * 8))); + COPY_TEMPLATE(buffer, loadR11Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x08 + (slotCount * 8))); + COPY_TEMPLATE(buffer, loadR12Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x10 + (slotCount * 8))); + COPY_TEMPLATE(buffer, loadR13Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x18 + (slotCount * 8))); + COPY_TEMPLATE(buffer, loadR14Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x20 + (slotCount * 8))); + COPY_TEMPLATE(buffer, loadR15Offset, invokeStaticSize); + patchImm4(buffer, (U_32)(0x28 + (slotCount * 8))); + TR::RealRegister::RegNum returnRegIndex; + slotCount = 0; + bool isAddressOrLongorDouble = false; + + switch (typeString[0]) + { + case MJIT::VOID_TYPE_CHARACTER: + break; + case MJIT::BOOLEAN_TYPE_CHARACTER: + case MJIT::BYTE_TYPE_CHARACTER: + case MJIT::CHAR_TYPE_CHARACTER: + case MJIT::SHORT_TYPE_CHARACTER: + case MJIT::INT_TYPE_CHARACTER: + slotCount = 1; + returnRegIndex = properties->getIntegerReturnRegister(); + goto genStatic; + case MJIT::CLASSNAME_TYPE_CHARACTER: + slotCount = 1; + isAddressOrLongorDouble = true; + returnRegIndex = properties->getIntegerReturnRegister(); + goto genStatic; + case MJIT::LONG_TYPE_CHARACTER: + slotCount = 2; + isAddressOrLongorDouble = true; + returnRegIndex = properties->getIntegerReturnRegister(); + goto genStatic; + case MJIT::FLOAT_TYPE_CHARACTER: + slotCount = 1; + returnRegIndex = properties->getFloatReturnRegister(); + goto genStatic; + case MJIT::DOUBLE_TYPE_CHARACTER: + slotCount = 2; + isAddressOrLongorDouble = true; + returnRegIndex = properties->getFloatReturnRegister(); + goto genStatic; + genStatic: + if (slotCount) + { + COPY_TEMPLATE(buffer, retTemplate_sub, invokeStaticSize); + patchImm1(buffer, slotCount*8); + } + switch (returnRegIndex) + { + case TR::RealRegister::eax: + if (isAddressOrLongorDouble) + COPY_TEMPLATE(buffer, loadraxReturn, invokeStaticSize); + else + COPY_TEMPLATE(buffer, loadeaxReturn, invokeStaticSize); + break; + case TR::RealRegister::xmm0: + if (isAddressOrLongorDouble) + COPY_TEMPLATE(buffer, loadDxmm0Return, invokeStaticSize); + else + COPY_TEMPLATE(buffer, loadxmm0Return, invokeStaticSize); + break; + } + break; + default: + MJIT_ASSERT(_logFileFP, false, "Bad return type."); + } + return invokeStaticSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateGetField(char *buffer, TR_J9ByteCodeIterator *bci) + { + buffer_size_t getFieldSize = 0; + int32_t cpIndex = (int32_t)bci->next2Bytes(); + + // Attempt to resolve the offset at compile time. This can fail, and if it does we must fail the compilation for now. + // In the future research project we could attempt runtime resolution. TR does this with self modifying code. + // These are all from TR and likely have implications on how we use this offset. However, we do not know that as of yet. + TR::DataType type = TR::NoType; + U_32 fieldOffset = 0; + bool isVolatile, isFinal, isPrivate, isUnresolvedInCP; + bool resolved = _compilee->fieldAttributes(_comp, cpIndex, &fieldOffset, &type, &isVolatile, &isFinal, &isPrivate, false, &isUnresolvedInCP); + if (!resolved) + return 0; + + switch (type) + { + case TR::Int8: + case TR::Int16: + case TR::Int32: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, getFieldTemplatePrologue, getFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, intGetFieldTemplate, getFieldSize); + break; + case TR::Address: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, getFieldTemplatePrologue, getFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, addrGetFieldTemplatePrologue, getFieldSize); + if (TR::Compiler->om.compressObjectReferences()) + { + int32_t shift = TR::Compiler->om.compressedReferenceShift(); + COPY_TEMPLATE(buffer, decompressReferenceTemplate, getFieldSize); + patchImm1(buffer, shift); + } + COPY_TEMPLATE(buffer, addrGetFieldTemplate, getFieldSize); + break; + case TR::Int64: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, getFieldTemplatePrologue, getFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, longGetFieldTemplate, getFieldSize); + break; + case TR::Float: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, getFieldTemplatePrologue, getFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, floatGetFieldTemplate, getFieldSize); + break; + case TR::Double: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, getFieldTemplatePrologue, getFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, doubleGetFieldTemplate, getFieldSize); + break; + default: + trfprintf(_logFileFP, "Argument type %s is not supported\n", type.toString()); + break; + } + if (_comp->getOption(TR_TraceCG)) + trfprintf(_logFileFP, "Field Offset: %u\n", fieldOffset); + + return getFieldSize; + } + +buffer_size_t +MJIT::CodeGenerator::generatePutField(char *buffer, TR_J9ByteCodeIterator *bci) + { + buffer_size_t putFieldSize = 0; + int32_t cpIndex = (int32_t)bci->next2Bytes(); + + // Attempt to resolve the address at compile time. This can fail, and if it does we must fail the compilation for now. + // In the future research project we could attempt runtime resolution. TR does this with self modifying code. + // These are all from TR and likely have implications on how we use this address. However, we do not know that as of yet. + U_32 fieldOffset = 0; + TR::DataType type = TR::NoType; + bool isVolatile, isFinal, isPrivate, isUnresolvedInCP; + bool resolved = _compilee->fieldAttributes(_comp, cpIndex, &fieldOffset, &type, &isVolatile, &isFinal, &isPrivate, true, &isUnresolvedInCP); + + if (!resolved) + return 0; + + switch (type) + { + case TR::Int8: + case TR::Int16: + case TR::Int32: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, intPutFieldTemplatePrologue, putFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, intPutFieldTemplate, putFieldSize); + break; + case TR::Address: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, addrPutFieldTemplatePrologue, putFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + if (TR::Compiler->om.compressObjectReferences()) + { + int32_t shift = TR::Compiler->om.compressedReferenceShift(); + COPY_TEMPLATE(buffer, compressReferenceTemplate, putFieldSize); + patchImm1(buffer, shift); + } + COPY_TEMPLATE(buffer, intPutFieldTemplate, putFieldSize); + break; + case TR::Int64: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, longPutFieldTemplatePrologue, putFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, longPutFieldTemplate, putFieldSize); + break; + case TR::Float: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, floatPutFieldTemplatePrologue, putFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, floatPutFieldTemplate, putFieldSize); + break; + case TR::Double: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, doublePutFieldTemplatePrologue, putFieldSize); + patchImm4(buffer, (U_32)(fieldOffset)); + COPY_TEMPLATE(buffer, doublePutFieldTemplate, putFieldSize); + break; + default: + trfprintf(_logFileFP, "Argument type %s is not supported\n", type.toString()); + break; + } + if (_comp->getOption(TR_TraceCG)) + trfprintf(_logFileFP, "Field Offset: %u\n", fieldOffset); + + return putFieldSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateGetStatic(char *buffer, TR_J9ByteCodeIterator *bci) + { + buffer_size_t getStaticSize = 0; + int32_t cpIndex = (int32_t)bci->next2Bytes(); + + // Attempt to resolve the address at compile time. This can fail, and if it does we must fail the compilation for now. + // In the future research project we could attempt runtime resolution. TR does this with self modifying code. + void *dataAddress; + //These are all from TR and likely have implications on how we use this address. However, we do not know that as of yet. + TR::DataType type = TR::NoType; + bool isVolatile, isFinal, isPrivate, isUnresolvedInCP; + bool resolved = _compilee->staticAttributes(_comp, cpIndex, &dataAddress, &type, &isVolatile, &isFinal, &isPrivate, false, &isUnresolvedInCP); + if (!resolved) + return 0; + + switch (type) + { + case TR::Int8: + case TR::Int16: + case TR::Int32: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, getStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, intGetStaticTemplate, getStaticSize); + break; + case TR::Address: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, getStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, addrGetStaticTemplatePrologue, getStaticSize); + /* + if (TR::Compiler->om.compressObjectReferences()) + { + int32_t shift = TR::Compiler->om.compressedReferenceShift(); + COPY_TEMPLATE(buffer, decompressReferenceTemplate, getStaticSize); + patchImm1(buffer, shift); + } + */ + COPY_TEMPLATE(buffer, addrGetStaticTemplate, getStaticSize); + break; + case TR::Int64: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, getStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, longGetStaticTemplate, getStaticSize); + break; + case TR::Float: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, getStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, floatGetStaticTemplate, getStaticSize); + break; + case TR::Double: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, getStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, doubleGetStaticTemplate, getStaticSize); + break; + default: + trfprintf(_logFileFP, "Argument type %s is not supported\n", type.toString()); + break; + } + + return getStaticSize; + } + +buffer_size_t +MJIT::CodeGenerator::generatePutStatic(char *buffer, TR_J9ByteCodeIterator *bci) + { + buffer_size_t putStaticSize = 0; + int32_t cpIndex = (int32_t)bci->next2Bytes(); + + // Attempt to resolve the address at compile time. This can fail, and if it does we must fail the compilation for now. + // In the future research project we could attempt runtime resolution. TR does this with self modifying code. + void *dataAddress; + // These are all from TR and likely have implications on how we use this address. However, we do not know that as of yet. + TR::DataType type = TR::NoType; + bool isVolatile, isFinal, isPrivate, isUnresolvedInCP; + bool resolved = _compilee->staticAttributes(_comp, cpIndex, &dataAddress, &type, &isVolatile, &isFinal, &isPrivate, true, &isUnresolvedInCP); + if (!resolved) + return 0; + + switch (type) + { + case TR::Int8: + case TR::Int16: + case TR::Int32: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, putStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, intPutStaticTemplate, putStaticSize); + break; + case TR::Address: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, putStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, addrPutStaticTemplatePrologue, putStaticSize); + /* + if (TR::Compiler->om.compressObjectReferences()) + { + int32_t shift = TR::Compiler->om.compressedReferenceShift(); + COPY_TEMPLATE(buffer, compressReferenceTemplate, putStaticSize); + patchImm1(buffer, shift); + } + */ + COPY_TEMPLATE(buffer, addrPutStaticTemplate, putStaticSize); + break; + case TR::Int64: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, putStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, longPutStaticTemplate, putStaticSize); + break; + case TR::Float: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, putStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, floatPutStaticTemplate, putStaticSize); + break; + case TR::Double: + // Generate template and instantiate immediate values + COPY_TEMPLATE(buffer, staticTemplatePrologue, putStaticSize); + MJIT_ASSERT(_logFileFP, (U_64)dataAddress <= 0x00000000ffffffff, "data is above 4-byte boundary"); + patchImm4(buffer, (U_32)((U_64)dataAddress & 0x00000000ffffffff)); + COPY_TEMPLATE(buffer, doublePutStaticTemplate, putStaticSize); + break; + default: + trfprintf(_logFileFP, "Argument type %s is not supported\n", type.toString()); + break; + } + + return putStaticSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateColdArea(char *buffer, J9Method *method, char *jitStackOverflowJumpPatchLocation) + { + buffer_size_t coldAreaSize = 0; + PATCH_RELATIVE_ADDR_32_BIT(jitStackOverflowJumpPatchLocation, (intptr_t)buffer); + COPY_TEMPLATE(buffer, movEDIImm32, coldAreaSize); + patchImm4(buffer, _stackPeakSize); + + COPY_TEMPLATE(buffer, call4ByteRel, coldAreaSize); + uintptr_t jitStackOverflowHelperAddress = getHelperOrTrampolineAddress(TR_stackOverflow, (uintptr_t)buffer); + PATCH_RELATIVE_ADDR_32_BIT(buffer, jitStackOverflowHelperAddress); + + COPY_TEMPLATE(buffer, jump4ByteRel, coldAreaSize); + PATCH_RELATIVE_ADDR_32_BIT(buffer, jitStackOverflowJumpPatchLocation); + + return coldAreaSize; + } + +buffer_size_t +MJIT::CodeGenerator::generateDebugBreakpoint(char *buffer) + { + buffer_size_t codeGenSize = 0; + COPY_TEMPLATE(buffer, debugBreakpoint, codeGenSize); + return codeGenSize; + } + +TR::CodeCache * +MJIT::CodeGenerator::getCodeCache() + { + return _codeCache; + } + +U_8 * +MJIT::CodeGenerator::allocateCodeCache(int32_t length, TR_J9VMBase *vmBase, J9VMThread *vmThread) + { + TR::CodeCacheManager *manager = TR::CodeCacheManager::instance(); + int32_t compThreadID = vmBase->getCompThreadIDForVMThread(vmThread); + int32_t numReserved; + + if (!_codeCache) + { + _codeCache = manager->reserveCodeCache(false, length, compThreadID, &numReserved); + if (!_codeCache) + return NULL; + _comp->cg()->setCodeCache(_codeCache); + } + + uint8_t *coldCode; + U_8 *codeStart = manager->allocateCodeMemory(length, 0, &_codeCache, &coldCode, false); + if (!codeStart) + return NULL; + + return codeStart; + } diff --git a/runtime/compiler/microjit/x/amd64/AMD64Codegen.hpp b/runtime/compiler/microjit/x/amd64/AMD64Codegen.hpp new file mode 100644 index 00000000000..cb07b272a82 --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/AMD64Codegen.hpp @@ -0,0 +1,427 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#ifndef MJIT_AMD64CODEGEN_HPP +#define MJIT_AMD64CODEGEN_HPP + +#include +#include "microjit/SideTables.hpp" +#include "microjit/utils.hpp" +#include "microjit/x/amd64/AMD64Linkage.hpp" +#include "microjit/x/amd64/AMD64CodegenGC.hpp" +#include "control/RecompilationInfo.hpp" +#include "env/IO.hpp" +#include "env/TRMemory.hpp" +#include "ilgen/J9ByteCodeIterator.hpp" +#include "oti/j9generated.h" + +namespace TR { class Compilation; } +namespace TR { class CFG; } + +typedef unsigned int buffer_size_t; +namespace MJIT +{ + +enum TypeCharacters + { + BYTE_TYPE_CHARACTER = 'B', + CHAR_TYPE_CHARACTER = 'C', + DOUBLE_TYPE_CHARACTER = 'D', + FLOAT_TYPE_CHARACTER = 'F', + INT_TYPE_CHARACTER = 'I', + LONG_TYPE_CHARACTER = 'J', + CLASSNAME_TYPE_CHARACTER = 'L', + SHORT_TYPE_CHARACTER = 'S', + BOOLEAN_TYPE_CHARACTER = 'Z', + VOID_TYPE_CHARACTER = 'V', + }; + +inline static U_8 +typeSignatureSize(char typeBuffer) + { + switch (typeBuffer) + { + case MJIT::BYTE_TYPE_CHARACTER: + case MJIT::CHAR_TYPE_CHARACTER: + case MJIT::INT_TYPE_CHARACTER: + case MJIT::FLOAT_TYPE_CHARACTER: + case MJIT::CLASSNAME_TYPE_CHARACTER: + case MJIT::SHORT_TYPE_CHARACTER: + case MJIT::BOOLEAN_TYPE_CHARACTER: + return 8; + case MJIT::DOUBLE_TYPE_CHARACTER: + case MJIT::LONG_TYPE_CHARACTER: + return 16; + default: + return 0; + } + } + +RegisterStack +mapIncomingParams(char*, U_16, int*, ParamTableEntry*, U_16, TR::FilePointer*); + +bool +nativeSignature(J9Method *method, char *resultBuffer); + +int +getParamCount(char *typeString, U_16 maxLength); + +int +getActualParamCount(char *typeString, U_16 maxLength); + +int +getMJITStackSlotCount(char *typeString, U_16 maxLength); + +class CodeGenerator + { + + private: + Linkage _linkage; + TR::FilePointer *_logFileFP; + TR_J9VMBase& _vm; + TR::CodeCache *_codeCache; + int32_t _stackPeakSize; + ParamTable *_paramTable; + TR::Compilation *_comp; + MJIT::CodeGenGC *_mjitCGGC; + TR::GCStackAtlas *_atlas; + TR::PersistentInfo *_persistentInfo; + TR_Memory *_trMemory; + TR_ResolvedMethod *_compilee; + uint16_t _maxCalleeArgsSize; + bool _printByteCodes; + + buffer_size_t generateSwitchToInterpPrePrologue( + char*, + J9Method*, + buffer_size_t, + buffer_size_t, + char*, + U_16); + + buffer_size_t generateEpologue(char*); + + buffer_size_t loadArgsFromStack( + char*, + buffer_size_t, + ParamTable*); + + buffer_size_t saveArgsOnStack( + char*, + buffer_size_t, + ParamTable*); + + buffer_size_t + saveArgsInLocalArray( + char*, + buffer_size_t, + char*); + + /** + * Generates a load instruction based on the type. + * + * @param buffer code buffer + * @param bc Bytecode that generated the load instruction + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateLoad( + char *buffer, + TR_J9ByteCode bc, + TR_J9ByteCodeIterator *bci); + + /** + * Generates a store instruction based on the type. + * + * @param buffer code buffer + * @param bc Bytecode that generated the load instruction + * @param bci Bytecode iterator from which the bytecode was retrieved + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateStore( + char *buffer, + TR_J9ByteCode bc, + TR_J9ByteCodeIterator *bci); + + /** + * Generates argument moves for static method invocation. + * + * @param buffer code buffer + * @param staticMethod method for introspection + * @param typeString typeString for getting arguments + * @param typeStringLength the length of the string stored in typeString + * @return size of generated code; 0 if method failed, -1 if method has no arguments + */ + buffer_size_t + generateArgumentMoveForStaticMethod( + char *buffer, + TR_ResolvedMethod *staticMethod, + char *typeString, + U_16 typeStringLength, + U_16 slotCount, + struct TR::X86LinkageProperties *properties); + + /** + * Generates an invoke static instruction based on the type. + * + * @param buffer code buffer + * @param bci Bytecode iterator from which the bytecode was retrieved + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateInvokeStatic( + char *buffer, + TR_J9ByteCodeIterator *bci, + struct TR::X86LinkageProperties *properties); + + /** + * Generates a getField instruction based on the type. + * + * @param buffer code buffer + * @param bci Bytecode iterator from which the bytecode was retrieved + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateGetField( + char *buffer, + TR_J9ByteCodeIterator *bci); + + /** + * Generates a putField instruction based on the type. + * + * @param buffer code buffer + * @param bci Bytecode iterator from which the bytecode was retrieved + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generatePutField( + char *buffer, + TR_J9ByteCodeIterator *bci); + + /** + * Generates a putStatic instruction based on the type. + * + * @param buffer code buffer + * @param bci Bytecode iterator from which the bytecode was retrieved + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generatePutStatic( + char *buffer, + TR_J9ByteCodeIterator *bci); + + /** + * Generates a getStatic instruction based on the type. + * + * @param buffer code buffer + * @param bci Bytecode iterator from which the bytecode was retrieved + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateGetStatic( + char *buffer, + TR_J9ByteCodeIterator *bci); + + /** + * Generates a return instruction based on the type. + * + * @param buffer code buffer + * @param dt The data type to be returned by the TR-compiled method + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateReturn( + char *buffer, + TR::DataType dt, + struct TR::X86LinkageProperties *properties); + + /** + * Generates a guarded counter for recompilation through JITed counting. + * + * @param buffer code buffer + * @param initialCount invocations before compiling with TR + * @return size of generate code; 0 if method failed + */ + buffer_size_t + generateGCR( + char *buffer, + int32_t initialCount, + J9Method *method, + uintptr_t startPC); + + public: + CodeGenerator() = delete; + CodeGenerator( + struct J9JITConfig*, + J9VMThread*, + TR::FilePointer*, + TR_J9VMBase&, + ParamTable*, + TR::Compilation*, + MJIT::CodeGenGC*, + TR::PersistentInfo*, + TR_Memory*, + TR_ResolvedMethod*); + + inline void + setPeakStackSize(int32_t newSize) + { + _stackPeakSize = newSize; + } + + inline int32_t + getPeakStackSize() + { + return _stackPeakSize; + } + + /** + * Write the pre-prologue to the buffer and return the size written to the buffer. + * + * @param buffer code buffer + * @param method method for introspection + * @param magicWordLocation, location of variable to hold pointer to space allocated for magic word + * @param first2BytesPatchLocation, location of variable to hold pointer to first 2 bytes of method to be executed + * @param bodyInfo location of variable to hold pointer to the bodyInfo, allocated inside this method + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generatePrePrologue( + char *buffer, + J9Method *method, + char** magicWordLocation, + char** first2BytesPatchLocation, + char** samplingRecompileCallLocation); + + /** + * Write the prologue to the buffer and return the size written to the buffer. + * + * @param buffer code buffer + * @param method method for introspection + * @param jitStackOverflowJumpPatchLocation, location of variable to hold pointer to line that jumps to the stack overflow helper. + * @param magicWordLocation, pointer to magic word location, written to during prologue generation + * @param first2BytesPatchLocation, pointer to first 2 bytes location, written to during prologue generation + * @param firstInstLocation, location of variable to hold pointer to first instruction, later used to set entry point + * @param bci bytecode iterator, used to gather type information + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generatePrologue( + char *buffer, + J9Method *method, + char** jitStackOverflowJumpPatchLocation, + char *magicWordLocation, + char *first2BytesPatchLocation, + char *samplingRecompileCallLocation, + char** firstInstLocation, + TR_J9ByteCodeIterator *bci); + + /** + * Generates the cold area used for helpers, such as the jit stack overflow checker. + * + * @param buffer code buffer + * @param method method for introspection + * @param jitStackOverflowJumpPatchLocation location to be patched with jit stack overflow helper relative address. + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateColdArea( + char *buffer, + J9Method *method, + char *jitStackOverflowJumpPatchLocation); + + /** + * Generates the body for a method. + * + * @param buffer code buffer + * @param bci byte code iterator used in hot loop to generate body of compiled method + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateBody( + char *buffer, + TR_J9ByteCodeIterator *bci); + + /** + * Generates a profiling counter for an edge. + * + * @param buffer code buffer + * @param bci byte code iterator, currently pointing to the edge that needs to be profiled + * @return size of generated code; 0 if method failed + */ + buffer_size_t + generateEdgeCounter( + char *buffer, + TR_J9ByteCodeIterator *bci); + + /** + * Generate an int3 for signaling debug breakpoint for Linux x86 + * + * @param buffer code buffer + */ + buffer_size_t + generateDebugBreakpoint(char *buffer); + + /** + * Allocate space in the code cache of size length and + * copy the buffer to the cache. + * + * @param length length of code in the buffer + * @param vmBase To check if compilation should be interrupted + * @param vmThread Thread Id is stored in cache + * @return pointer to the new code cache segment or NULL if failed. + */ + U_8 * + allocateCodeCache( + int32_t length, + TR_J9VMBase *vmBase, + J9VMThread *vmThread); + + /** + * Get the code cache + */ + TR::CodeCache *getCodeCache(); + + /** + * Get a pointer to the Stack Atlas + */ + TR::GCStackAtlas *getStackAtlas(); + + inline uint8_t + getPointerSize() + { + return _linkage._properties.getPointerSize(); + } + }; + +class MJITCompilationFailure: public virtual std::exception + { + public: + MJITCompilationFailure() { } + virtual const char *what() const throw() + { + return "Unable to compile method."; + } + }; + +} // namespace MJIT +#endif /* MJIT_AMD64CODEGEN_HPP */ diff --git a/runtime/compiler/microjit/x/amd64/AMD64CodegenGC.cpp b/runtime/compiler/microjit/x/amd64/AMD64CodegenGC.cpp new file mode 100644 index 00000000000..134c72d52ba --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/AMD64CodegenGC.cpp @@ -0,0 +1,155 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#include +#include +#include "microjit/x/amd64/AMD64CodegenGC.hpp" +#include "env/StackMemoryRegion.hpp" +#include "codegen/CodeGenerator.hpp" +#include "codegen/CodeGenerator_inlines.hpp" +#include "codegen/GCStackAtlas.hpp" +#include "codegen/GCStackMap.hpp" +#include "codegen/Linkage.hpp" +#include "codegen/Linkage_inlines.hpp" +#include "compile/Compilation.hpp" +#include "control/Options.hpp" +#include "control/Options_inlines.hpp" +#include "env/ObjectModel.hpp" +#include "env/CompilerEnv.hpp" +#include "env/TRMemory.hpp" +#include "env/jittypes.h" +#include "il/AutomaticSymbol.hpp" +#include "il/Node.hpp" +#include "il/ParameterSymbol.hpp" +#include "il/ResolvedMethodSymbol.hpp" +#include "il/Symbol.hpp" +#include "il/SymbolReference.hpp" +#include "infra/Assert.hpp" +#include "infra/BitVector.hpp" +#include "infra/IGNode.hpp" +#include "infra/InterferenceGraph.hpp" +#include "infra/List.hpp" +#include "microjit/SideTables.hpp" +#include "microjit/utils.hpp" +#include "ras/Debug.hpp" + +MJIT::CodeGenGC::CodeGenGC(TR::FilePointer *logFileFP) + : _logFileFP(logFileFP) + {} + +TR::GCStackAtlas * +MJIT::CodeGenGC::createStackAtlas(TR::Compilation *comp, MJIT::ParamTable *paramTable, MJIT::LocalTable *localTable) + { + + // -------------------------------------------------------------------------------- + // Construct the parameter map for mapped reference parameters + // + intptr_t stackSlotSize = TR::Compiler->om.sizeofReferenceAddress(); + U_16 paramCount = paramTable->getParamCount(); + U_16 ParamSlots = paramTable->getTotalParamSize()/stackSlotSize; + TR_GCStackMap *parameterMap = new (comp->trHeapMemory(), paramCount) TR_GCStackMap(paramCount); + + int32_t firstMappedParmOffsetInBytes = -1; + MJIT::ParamTableEntry entry; + for (int i=0; igetEntry(i, &entry), "Bad index for table entry"); + if (entry.notInitialized) + { + i++; + continue; + } + if (!entry.notInitialized && entry.isReference) + { + firstMappedParmOffsetInBytes = entry.gcMapOffset; + break; + } + i += entry.slots; + } + + for (int i=0; igetEntry(i, &entry), "Bad index for table entry"); + if (entry.notInitialized) + { + i++; + continue; + } + if (!entry.notInitialized && entry.isReference) + { + int32_t entryOffsetInBytes = entry.gcMapOffset; + parameterMap->setBit(((entryOffsetInBytes-firstMappedParmOffsetInBytes)/stackSlotSize)); + if (!entry.onStack) + { + parameterMap->setRegisterBits(TR::RealRegister::gprMask((TR::RealRegister::RegNum)entry.regNo)); + } + } + i += entry.slots; + } + + // -------------------------------------------------------------------------------- + // Construct the local map for mapped reference locals. + // This does not duplicate items mapped from the parameter table. + // + firstMappedParmOffsetInBytes = + (firstMappedParmOffsetInBytes >= 0) ? firstMappedParmOffsetInBytes : 0; + U_16 localCount = localTable->getLocalCount(); + TR_GCStackMap *localMap = new (comp->trHeapMemory(), localCount) TR_GCStackMap(localCount); + localMap->copy(parameterMap); + for (int i=paramCount; igetEntry(i, &entry), "Bad index for table entry"); + if (entry.notInitialized) + { + i++; + continue; + } + if (!entry.notInitialized && entry.offset == -1 && entry.isReference) + { + int32_t entryOffsetInBytes = entry.gcMapOffset; + localMap->setBit(((entryOffsetInBytes-firstMappedParmOffsetInBytes)/stackSlotSize)); + if (!entry.onStack) + { + localMap->setRegisterBits(TR::RealRegister::gprMask((TR::RealRegister::RegNum)entry.regNo)); + } + } + i += entry.slots; + } + + // -------------------------------------------------------------------------- + // Now create the stack atlas + // + TR::GCStackAtlas * atlas = new (comp->trHeapMemory()) TR::GCStackAtlas(paramCount, localCount, comp->trMemory()); + atlas->setParmBaseOffset(firstMappedParmOffsetInBytes); + atlas->setParameterMap(parameterMap); + atlas->setLocalMap(localMap); + // MJIT does not create stack allocated objects + atlas->setStackAllocMap(NULL); + // MJIT must initialize all locals which are not parameters + atlas->setNumberOfSlotsToBeInitialized(localCount - paramCount); + atlas->setIndexOfFirstSpillTemp(localCount); + // MJIT does not use internal pointers + atlas->setInternalPointerMap(NULL); + // MJIT does not mimic interpreter frame shape + atlas->setNumberOfPendingPushSlots(0); + atlas->setNumberOfPaddingSlots(0); + return atlas; + } diff --git a/runtime/compiler/microjit/x/amd64/AMD64CodegenGC.hpp b/runtime/compiler/microjit/x/amd64/AMD64CodegenGC.hpp new file mode 100644 index 00000000000..da2aba640a3 --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/AMD64CodegenGC.hpp @@ -0,0 +1,43 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#ifndef MJIT_AMD64_CODEGENGC_HPP +#define MJIT_AMD64_CODEGENGC_HPP +#include "codegen/GCStackAtlas.hpp" +#include "microjit/SideTables.hpp" +#include "env/IO.hpp" + +namespace MJIT +{ + +class CodeGenGC + { + + private: + TR::FilePointer *_logFileFP; + + public: + CodeGenGC(TR::FilePointer *logFileFP); + TR::GCStackAtlas *createStackAtlas(TR::Compilation*, MJIT::ParamTable*, MJIT::LocalTable*); + }; + +} // namespace MJIT +#endif /* MJIT_AMD64_CODEGENGC_HPP */ diff --git a/runtime/compiler/microjit/x/amd64/AMD64Linkage.cpp b/runtime/compiler/microjit/x/amd64/AMD64Linkage.cpp new file mode 100644 index 00000000000..864711d7393 --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/AMD64Linkage.cpp @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#include "AMD64Linkage.hpp" + +/* +| Architecture | Endian | Return address | Argument registers | +| x86-64 | Little | Stack | RAX RSI RDX RCX | + +| Return value registers | Preserved registers | vTable index | +| EAX (32-bit) RAX (64-bit) XMM0 (float/double) | RBX R9 RSP | R8 (receiver in RAX) | +*/ + +MJIT::Linkage::Linkage(TR::CodeGenerator *cg) + { + // TODO: MicroJIT: Use the method setOffsetToFirstParm(RETURN_ADDRESS_SIZE); + _properties._offsetToFirstParm = RETURN_ADDRESS_SIZE; + setProperties(cg,&_properties); + } diff --git a/runtime/compiler/microjit/x/amd64/AMD64Linkage.hpp b/runtime/compiler/microjit/x/amd64/AMD64Linkage.hpp new file mode 100644 index 00000000000..47263dc3d26 --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/AMD64Linkage.hpp @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2022, 2022 IBM Corp. and others + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution and + * is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following + * Secondary Licenses when the conditions for such availability set + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU + * General Public License, version 2 with the GNU Classpath + * Exception [1] and GNU General Public License, version 2 with the + * OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] http://openjdk.java.net/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + *******************************************************************************/ +#ifndef AMD64LINKAGE_HPP +#define AMD64LINKAGE_HPP + +#include "codegen/AMD64PrivateLinkage.hpp" +#include "codegen/OMRLinkage_inlines.hpp" + +/* +| Architecture | Endian | Return address | Argument registers | +| x86-64 | Little | Stack | RAX RSI RDX RCX | + +| Return value registers | Preserved registers | vTable index | +| EAX (32-bit) RAX (64-bit) XMM0 (float/double) | RBX R9 RSP | R8 (receiver in RAX) | +*/ + +namespace MJIT +{ + +enum + { + RETURN_ADDRESS_SIZE=8, + }; + +class Linkage +{ + +public: + struct TR::X86LinkageProperties _properties; + Linkage() = delete; + Linkage(TR::CodeGenerator*); +}; + +} // namespace MJIT +#endif /* AMD64LINKAGE_HPP */ diff --git a/runtime/compiler/microjit/x/amd64/CMakeLists.txt b/runtime/compiler/microjit/x/amd64/CMakeLists.txt new file mode 100644 index 00000000000..446738f44f4 --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/CMakeLists.txt @@ -0,0 +1,27 @@ +################################################################################ +# Copyright (c) 2022, 2022 IBM Corp. and others +# +# This program and the accompanying materials are made available under +# the terms of the Eclipse Public License 2.0 which accompanies this +# distribution and is available at https://www.eclipse.org/legal/epl-2.0/ +# or the Apache License, Version 2.0 which accompanies this distribution and +# is available at https://www.apache.org/licenses/LICENSE-2.0. +# +# This Source Code may also be made available under the following +# Secondary Licenses when the conditions for such availability set +# forth in the Eclipse Public License, v. 2.0 are satisfied: GNU +# General Public License, version 2 with the GNU Classpath +# Exception [1] and GNU General Public License, version 2 with the +# OpenJDK Assembly Exception [2]. +# +# [1] https://www.gnu.org/software/classpath/license.html +# [2] http://openjdk.java.net/legal/assembly-exception.html +# +# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception +################################################################################ +j9jit_files( + microjit/x/amd64/AMD64Codegen.cpp + microjit/x/amd64/AMD64CodegenGC.cpp + microjit/x/amd64/AMD64Linkage.cpp +) +add_subdirectory(templates) diff --git a/runtime/compiler/microjit/x/amd64/templates/CMakeLists.txt b/runtime/compiler/microjit/x/amd64/templates/CMakeLists.txt new file mode 100644 index 00000000000..6e174d6e12b --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/templates/CMakeLists.txt @@ -0,0 +1,26 @@ +################################################################################ +# Copyright (c) 2022, 2022 IBM Corp. and others +# +# This program and the accompanying materials are made available under +# the terms of the Eclipse Public License 2.0 which accompanies this +# distribution and is available at https://www.eclipse.org/legal/epl-2.0/ +# or the Apache License, Version 2.0 which accompanies this distribution and +# is available at https://www.apache.org/licenses/LICENSE-2.0. +# +# This Source Code may also be made available under the following +# Secondary Licenses when the conditions for such availability set +# forth in the Eclipse Public License, v. 2.0 are satisfied: GNU +# General Public License, version 2 with the GNU Classpath +# Exception [1] and GNU General Public License, version 2 with the +# OpenJDK Assembly Exception [2]. +# +# [1] https://www.gnu.org/software/classpath/license.html +# [2] http://openjdk.java.net/legal/assembly-exception.html +# +# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception +################################################################################ +j9jit_files( + microjit/x/amd64/templates/microjit.nasm + microjit/x/amd64/templates/bytecodes.nasm + microjit/x/amd64/templates/linkage.nasm +) diff --git a/runtime/compiler/microjit/x/amd64/templates/bytecodes.nasm b/runtime/compiler/microjit/x/amd64/templates/bytecodes.nasm new file mode 100644 index 00000000000..fde94ab08b9 --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/templates/bytecodes.nasm @@ -0,0 +1,1351 @@ +; Copyright (c) 2022, 2022 IBM Corp. and others +; +; This program and the accompanying materials are made available under +; the terms of the Eclipse Public License 2.0 which accompanies this +; distribution and is available at https://www.eclipse.org/legal/epl-2.0/ +; or the Apache License, Version 2.0 which accompanies this distribution and +; is available at https://www.apache.org/licenses/LICENSE-2.0. +; +; This Source Code may also be made available under the following +; Secondary Licenses when the conditions for such availability set +; forth in the Eclipse Public License, v. 2.0 are satisfied: GNU +; General Public License, version 2 with the GNU Classpath +; Exception [1] and GNU General Public License, version 2 with the +; OpenJDK Assembly Exception [2]. +; +; [1] https://www.gnu.org/software/classpath/license.html +; [2] http://openjdk.java.net/legal/assembly-exception.html +; +; SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception +%include "utils.nasm" +%include "microjit.nasm" + +;MicroJIT Virtual Machine to X86-64 mapping +; rsp: +; is the stack extent for the java value stack pointer. +; r10: +; will hold the java value stack pointer. +; r11: +; stores the accumulator, or +; stores a pointer to an object +; r12: +; stores any value which will act on the accumulator, or +; stores the value to be written to an object field, or +; stores the value read from an object field +; r13: +; holds addresses for absolute addressing +; r14: +; holds a pointer to the start of the local array +; r15: +; stores values loaded from memory for storing on the stack or in the local array + +template_start debugBreakpoint + int3 ; trigger hardware interrupt +template_end debugBreakpoint + +; All reference loads require adding stack space of 8 bytes +; and moving values from a stack slot to a temp register +template_start aloadTemplatePrologue + push_single_slot + _64bit_local_to_rXX_PATCH r15 +template_end aloadTemplatePrologue + +; All integer loads require adding stack space of 8 bytes +; and moving values from a stack slot to a temp register +template_start iloadTemplatePrologue + push_single_slot + _32bit_local_to_rXX_PATCH r15 +template_end iloadTemplatePrologue + +; All long loads require adding 2 stack spaces of 8 bytes (16 total) +; and moving values from a stack slot to a temp register +template_start lloadTemplatePrologue + push_dual_slot + _64bit_local_to_rXX_PATCH r15 +template_end lloadTemplatePrologue + +template_start loadTemplate + mov [r10], r15 +template_end loadTemplate + +template_start invokeStaticTemplate + mov rdi, qword 0xefbeaddeefbeadde +template_end invokeStaticTemplate + +template_start retTemplate_sub + sub r10, byte 0xFF +template_end retTemplate_sub + +template_start loadeaxReturn + _32bit_slot_stack_from_eXX ax,0 +template_end loadeaxReturn + +template_start loadraxReturn + _64bit_slot_stack_from_rXX rax,0 +template_end loadraxReturn + +template_start loadxmm0Return + movd [r10], xmm0 +template_end loadxmm0Return + +template_start loadDxmm0Return + movq [r10], xmm0 +template_end loadDxmm0Return + +template_start moveeaxForCall + _32bit_slot_stack_to_eXX ax,0 + pop_single_slot +template_end moveeaxForCall + +template_start moveesiForCall + _32bit_slot_stack_to_eXX si,0 + pop_single_slot +template_end moveesiForCall + +template_start moveedxForCall + _32bit_slot_stack_to_eXX dx,0 + pop_single_slot +template_end moveedxForCall + +template_start moveecxForCall + _32bit_slot_stack_to_eXX cx,0 + pop_single_slot +template_end moveecxForCall + +template_start moveraxRefForCall + _64bit_slot_stack_to_rXX rax,0 + pop_single_slot +template_end moveraxRefForCall + +template_start moversiRefForCall + _64bit_slot_stack_to_rXX rsi,0 + pop_single_slot +template_end moversiRefForCall + +template_start moverdxRefForCall + _64bit_slot_stack_to_rXX rdx,0 + pop_single_slot +template_end moverdxRefForCall + +template_start movercxRefForCall + _64bit_slot_stack_to_rXX rcx,0 + pop_single_slot +template_end movercxRefForCall + +template_start moveraxForCall + _64bit_slot_stack_to_rXX rax,0 + pop_dual_slot +template_end moveraxForCall + +template_start moversiForCall + _64bit_slot_stack_to_rXX rsi,0 + pop_dual_slot +template_end moversiForCall + +template_start moverdxForCall + _64bit_slot_stack_to_rXX rdx,0 + pop_dual_slot +template_end moverdxForCall + +template_start movercxForCall + _64bit_slot_stack_to_rXX rcx,0 + pop_dual_slot +template_end movercxForCall + +template_start movexmm0ForCall + psrldq xmm0, 8 ; clear xmm0 by shifting right + movss xmm0, [r10] + pop_single_slot +template_end movexmm0ForCall + +template_start movexmm1ForCall + psrldq xmm1, 8 ; clear xmm1 by shifting right + movss xmm1, [r10] + pop_single_slot +template_end movexmm1ForCall + +template_start movexmm2ForCall + psrldq xmm2, 8 ; clear xmm2 by shifting right + movss xmm2, [r10] + pop_single_slot +template_end movexmm2ForCall + +template_start movexmm3ForCall + psrldq xmm3, 8 ; clear xmm3 by shifting right + movss xmm3, [r10] + pop_single_slot +template_end movexmm3ForCall + +template_start moveDxmm0ForCall + psrldq xmm0, 8 ; clear xmm0 by shifting right + movsd xmm0, [r10] + pop_dual_slot +template_end moveDxmm0ForCall + +template_start moveDxmm1ForCall + psrldq xmm1, 8 ; clear xmm1 by shifting right + movsd xmm1, [r10] + pop_dual_slot +template_end moveDxmm1ForCall + +template_start moveDxmm2ForCall + psrldq xmm2, 8 ; clear xmm2 by shifting right + movsd xmm2, [r10] + pop_dual_slot +template_end moveDxmm2ForCall + +template_start moveDxmm3ForCall + psrldq xmm3, 8 ; clear xmm3 by shifting right + movsd xmm3, [r10] + pop_dual_slot +template_end moveDxmm3ForCall + +template_start astoreTemplate + _64bit_slot_stack_to_rXX r15,0 + pop_single_slot + _64bit_local_from_rXX_PATCH r15 +template_end astoreTemplate + +template_start istoreTemplate + _32bit_slot_stack_to_rXX r15,0 + pop_single_slot + _32bit_local_from_rXX_PATCH r15 +template_end istoreTemplate + +template_start lstoreTemplate + _64bit_slot_stack_to_rXX r15,0 + pop_dual_slot + _64bit_local_from_rXX_PATCH r15 +template_end lstoreTemplate + +; stack before: ..., value → +; stack after: ... +template_start popTemplate + pop_single_slot ; remove the top value of the stack +template_end popTemplate + +; stack before: ..., value2, value1 → +; stack after: ... +template_start pop2Template + pop_dual_slot ; remove the top two values from the stack +template_end pop2Template + +; stack before: ..., value2, value1 → +; stack after: ..., value1, value2 +template_start swapTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy value1 into r12 + _64bit_slot_stack_to_rXX r11,8 ; copy value2 into r11 + _64bit_slot_stack_from_rXX r12,8 ; write value1 to the second stack slot + _64bit_slot_stack_from_rXX r11,0 ; write value2 to the top stack slot +template_end swapTemplate + +; stack before: ..., value1 → +; stack after: ..., value1, value1 +template_start dupTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy the top value into r12 + push_single_slot ; increase the stack by 1 slot + _64bit_slot_stack_from_rXX r12,0 ; push the duplicate on top of the stack +template_end dupTemplate + +; stack before: ..., value2, value1 → +; stack after: ..., value1, value2, value1 +template_start dupx1Template + _64bit_slot_stack_to_rXX r12,0 ; copy value1 into r12 + _64bit_slot_stack_to_rXX r11,8 ; copy value2 into r11 + _64bit_slot_stack_from_rXX r12,8 ; store value1 in third stack slot + _64bit_slot_stack_from_rXX r11,0 ; store value2 in second stack slot + push_single_slot ; increase stack by 1 slot + _64bit_slot_stack_from_rXX r12,0 ; store value1 on top stack slot +template_end dupx1Template + +; stack before: ..., value3, value2, value1 → +; stack after: ..., value1, value3, value2, value1 +template_start dupx2Template + _64bit_slot_stack_to_rXX r12,0 ; copy value1 into r12 + _64bit_slot_stack_to_rXX r11,8 ; copy value2 into r11 + _64bit_slot_stack_to_rXX r15,16 ; copy value3 into r15 + _64bit_slot_stack_from_rXX r12,16 ; store value1 in fourth stack slot + _64bit_slot_stack_from_rXX r15,8 ; store value3 in third stack slot + _64bit_slot_stack_from_rXX r11,0 ; store value2 in second stack slot + push_single_slot ; increase stack by 1 slot + _64bit_slot_stack_from_rXX r12,0 ; store value1 in top stack slot +template_end dupx2Template + +; stack before: ..., value2, value1 → +; stack after: ..., value2, value1, value2, value1 +template_start dup2Template + _64bit_slot_stack_to_rXX r12,0 ; copy value1 into r12 + _64bit_slot_stack_to_rXX r11,8 ; copy value2 into r11 + push_dual_slot ; increase stack by 2 slots + _64bit_slot_stack_from_rXX r11,8 ; store value2 in second stack slot + _64bit_slot_stack_from_rXX r12,0 ; store value1 in top stack slot +template_end dup2Template + +; stack before: ..., value3, value2, value1 → +; stack after: ..., value2, value1, value3, value2, value1 +template_start dup2x1Template + _64bit_slot_stack_to_rXX r12,0 ; copy value1 into r12 + _64bit_slot_stack_to_rXX r11,8 ; copy value2 into r11 + _64bit_slot_stack_to_rXX r15,16 ; copy value3 into r15 + _64bit_slot_stack_from_rXX r11,16 ; store value2 in fifth stack slot + _64bit_slot_stack_from_rXX r12,8 ; store value1 in fourth stack slot + _64bit_slot_stack_from_rXX r15,0 ; store value3 in third stack slot + push_dual_slot ; increase stack by 2 slots + _64bit_slot_stack_from_rXX r11,8 ; store value2 in second stack slot + _64bit_slot_stack_from_rXX r12,0 ; store value in top stack slot +template_end dup2x1Template + +; stack before: ..., value4, value3, value2, value1 → +; stack after: ..., value2, value1, value4, value3, value2, value1 +template_start dup2x2Template + _64bit_slot_stack_to_rXX r11,0 ; mov value1 into r11 + _64bit_slot_stack_to_rXX r12,8 ; mov value2 into r12 + _64bit_slot_stack_to_rXX r15,16 ; mov value3 into r15 + _64bit_slot_stack_from_rXX r11,16 ; mov value1 into old value3 slot + _64bit_slot_stack_from_rXX r15,0 ; mov value3 into old value1 slot + _64bit_slot_stack_to_rXX r15,24 ; mov value4 into r15 + _64bit_slot_stack_from_rXX r15,8 ; mov value4 into old value2 slot + _64bit_slot_stack_from_rXX r12,24 ; mov value2 into old value4 slot + push_single_slot ; increase the stack + _64bit_slot_stack_from_rXX r12,0 ; mov value2 into second slot + push_single_slot ; increase the stack + _64bit_slot_stack_from_rXX r11,0 ; mov value1 into top stack slot +template_end dup2x2Template + +template_start getFieldTemplatePrologue + _64bit_slot_stack_to_rXX r11,0 ; get the objectref from the stack + lea r13, [r11 + 0xefbeadde] ; load the effective address of the object field +template_end getFieldTemplatePrologue + +template_start intGetFieldTemplate + mov r12d, dword [r13] ; load only 32-bits + _32bit_slot_stack_from_rXX r12,0 ; put it on the stack +template_end intGetFieldTemplate + +template_start addrGetFieldTemplatePrologue + mov r12d, dword [r13] ; load only 32-bits +template_end addrGetFieldTemplatePrologue + +template_start decompressReferenceTemplate + shl r12, 0xff ; Reference from field is compressed, decompress by shift amount +template_end decompressReferenceTemplate + +template_start decompressReference1Template + shl r12, 0x01 ; Reference from field is compressed, decompress by shift amount +template_end decompressReference1Template + +template_start addrGetFieldTemplate + _64bit_slot_stack_from_rXX r12,0 ; put it on the stack +template_end addrGetFieldTemplate + +template_start longGetFieldTemplate + push_single_slot ; increase the stack pointer to make room for extra slot + mov r12, [r13] ; load the value + _64bit_slot_stack_from_rXX r12,0 ; put it on the stack +template_end longGetFieldTemplate + +template_start floatGetFieldTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + movss xmm0, dword [r13] ; load the value + movss [r10], xmm0 ; put it on the stack +template_end floatGetFieldTemplate + +template_start doubleGetFieldTemplate + push_single_slot ; increase the stack pointer to make room for extra slot + movsd xmm0, [r13] ; load the value + movsd [r10], xmm0 ; put it on the stack +template_end doubleGetFieldTemplate + +template_start intPutFieldTemplatePrologue + _32bit_slot_stack_to_rXX r11,0 ; copy value into r11 + pop_single_slot ; reduce stack pointer + _64bit_slot_stack_to_rXX r12,0 ; get the object reference + pop_single_slot ; reduce the stack pointer + lea r13, [r12 + 0xefbeadde] ; load the effective address of the object field +template_end intPutFieldTemplatePrologue + +template_start addrPutFieldTemplatePrologue + _64bit_slot_stack_to_rXX r11,0 ; copy value into r11 + pop_single_slot ; reduce stack pointer + _64bit_slot_stack_to_rXX r12,0 ; get the object reference + pop_single_slot ; reduce the stack pointer + lea r13, [r12 + 0xefbeadde] ; load the effective address of the object field +template_end addrPutFieldTemplatePrologue + +template_start longPutFieldTemplatePrologue + _64bit_slot_stack_to_rXX r11,0 ; copy value into r11 + pop_dual_slot ; reduce stack pointer + _64bit_slot_stack_to_rXX r12,0 ; get the object reference + pop_single_slot ; reduce the stack pointer + lea r13, [r12 + 0xefbeadde] ; load the effective address of the object field +template_end longPutFieldTemplatePrologue + +template_start floatPutFieldTemplatePrologue + movss xmm0, [r10] ; copy value into r11 + pop_single_slot ; reduce stack pointer + _64bit_slot_stack_to_rXX r12,0 ; get the object reference + pop_single_slot ; reduce the stack pointer + lea r13, [r12 + 0xefbeadde] ; load the effective address of the object field +template_end floatPutFieldTemplatePrologue + +template_start doublePutFieldTemplatePrologue + movsd xmm0, [r10] ; copy value into r11 + pop_dual_slot ; reduce stack pointer + _64bit_slot_stack_to_rXX r12,0 ; get the object reference + pop_single_slot ; reduce the stack pointer + lea r13, [r12 + 0xefbeadde] ; load the effective address of the object field +template_end doublePutFieldTemplatePrologue + +template_start intPutFieldTemplate + mov [r13], r11d ; Move value to memory +template_end intPutFieldTemplate + +template_start compressReferenceTemplate + shr r11, 0xff +template_end compressReferenceTemplate + +template_start compressReference1Template + shr r11, 0x01 +template_end compressReference1Template + +template_start longPutFieldTemplate + mov [r13], r11 ; Move value to memory +template_end longPutFieldTemplate + +template_start floatPutFieldTemplate + movss [r13], xmm0 ; Move value to memory +template_end floatPutFieldTemplate + +template_start doublePutFieldTemplate + movsd [r13], xmm0 ; Move value to memory +template_end doublePutFieldTemplate + +template_start staticTemplatePrologue + lea r13, [0xefbeadde] ; load the absolute address of a static value +template_end staticTemplatePrologue + +template_start intGetStaticTemplate + push_single_slot ; allocate a stack slot + mov r11d, dword [r13] ; load 32-bit value into r11 + _32bit_slot_stack_from_rXX r11,0 ; move it onto the stack +template_end intGetStaticTemplate + +template_start addrGetStaticTemplatePrologue + push_single_slot ; allocate a stack slot + mov r11, qword [r13] ; load 32-bit value into r11 +template_end addrGetStaticTemplatePrologue + +template_start addrGetStaticTemplate + _64bit_slot_stack_from_rXX r11,0 ; move it onto the stack +template_end addrGetStaticTemplate + +template_start longGetStaticTemplate + push_dual_slot ; allocate a stack slot + mov r11, [r13] ; load 64-bit value into r11 + _64bit_slot_stack_from_rXX r11,0 ; move it onto the stack +template_end longGetStaticTemplate + +template_start floatGetStaticTemplate + push_single_slot ; allocate a stack slot + movss xmm0, [r13] ; load value into r11 + movss [r10], xmm0 ; move it onto the stack +template_end floatGetStaticTemplate + +template_start doubleGetStaticTemplate + push_dual_slot ; allocate a stack slot + movsd xmm0, [r13] ; load value into r11 + movsd [r10], xmm0 ; move it onto the stack +template_end doubleGetStaticTemplate + +template_start intPutStaticTemplate + _32bit_slot_stack_to_rXX r11,0 ; Move stack value into r11 + pop_single_slot ; Pop the value off the stack + mov dword [r13], r11d ; Move it to memory +template_end intPutStaticTemplate + +template_start addrPutStaticTemplatePrologue + _64bit_slot_stack_to_rXX r11,0 ; Move stack value into r11 + pop_single_slot ; Pop the value off the stack +template_end addrPutStaticTemplatePrologue + +template_start addrPutStaticTemplate + mov qword [r13], r11 ; Move it to memory +template_end addrPutStaticTemplate + +template_start longPutStaticTemplate + _64bit_slot_stack_to_rXX r11,0 ; Move stack value into r11 + pop_dual_slot ; Pop the value off the stack + mov [r13], r11 ; Move it to memory +template_end longPutStaticTemplate + +template_start floatPutStaticTemplate + movss xmm0, [r10] ; Move stack value into r11 + pop_single_slot ; Pop the value off the stack + movss [r13], xmm0 ; Move it to memory +template_end floatPutStaticTemplate + +template_start doublePutStaticTemplate + movsd xmm0, [r10] ; Move stack value into r11 + pop_dual_slot ; Pop the value off the stack + movsd [r13], xmm0 ; Move it to memory +template_end doublePutStaticTemplate + +template_start iAddTemplate + _32bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_single_slot ; which means reducing the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r12,0 ; copy second value to the value register + add r11d, r12d ; add the value to the accumulator + _32bit_slot_stack_from_rXX r11,0 ; write the accumulator over the second arg +template_end iAddTemplate + +template_start iSubTemplate + _32bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r11,0 ; copy second value to the accumulator register + sub r11d, r12d ; subtract the value from the accumulator + _32bit_slot_stack_from_rXX r11,0 ; write the accumulator over the second arg +template_end iSubTemplate + +template_start iMulTemplate + _32bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_single_slot ; which means reducing the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r12,0 ; copy second value to the value register + imul r11d, r12d ; multiply accumulator by the value to the accumulator + _32bit_slot_stack_from_rXX r11,0 ; write the accumulator over the second arg +template_end iMulTemplate + +template_start iDivTemplate + _32bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the value register (divisor) + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_eXX ax,0 ; copy second value (dividend) to eax + cdq ; extend sign bit from eax to edx + idiv r12d ; divide edx:eax by lower 32 bits of r12 value + _32bit_slot_stack_from_eXX ax,0 ; store the quotient in accumulator +template_end iDivTemplate + +template_start iRemTemplate + _32bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the value register (divisor) + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_eXX ax,0 ; copy second value (dividend) to eax + cdq ; extend sign bit from eax to edx + idiv r12d ; divide edx:eax by lower 32 bits of r12 value + _32bit_slot_stack_from_eXX dx,0 ; store the remainder in accumulator +template_end iRemTemplate + +template_start iNegTemplate + _32bit_slot_stack_to_rXX r12,0 ; copy the top value from the stack into r12 + neg r12d ; two's complement negation of r12 + _32bit_slot_stack_from_rXX r12,0 ; store the result back on the stack +template_end iNegTemplate + +template_start iShlTemplate + mov cl, [r10] ; copy top value of stack in the cl register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_eXX ax,0 ; copy second value to eax + sal eax, cl ; arithmetic left shift eax by cl bits + _32bit_slot_stack_from_eXX ax,0 ; store the result back on the stack +template_end iShlTemplate + +template_start iShrTemplate + mov cl, [r10] ; copy top value of stack in the cl register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_eXX ax,0 ; copy second value to eax + sar eax, cl ; arithmetic right shift eax by cl bits + _32bit_slot_stack_from_eXX ax,0 ; store the result back on the stack +template_end iShrTemplate + +template_start iUshrTemplate + mov cl, [r10] ; copy top value of stack in the cl register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_eXX ax,0 ; copy second value to eax + shr eax, cl ; logical right shift eax by cl bits + _32bit_slot_stack_from_eXX ax,0 ; store the result back on the stack +template_end iUshrTemplate + +template_start iAndTemplate + _32bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the accumulator (first operand) + pop_single_slot ; reduce the stack size by 1 slot + _32bit_slot_stack_to_rXX r11,0 ; copy the second value into a register (second operand) + and r11d, r12d ; perform the bitwise and operation + _32bit_slot_stack_from_rXX r11,0 ; store the result back on the java stack +template_end iAndTemplate + +template_start iOrTemplate + _32bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the accumulator (first operand) + pop_single_slot ; reduce the stack size by 1 slot + _32bit_slot_stack_to_rXX r11,0 ; copy the second value into a register (second operand) + or r11d, r12d ; perform the bitwise or operation + _32bit_slot_stack_from_rXX r11,0 ; store the result back on the java stack +template_end iOrTemplate + +template_start iXorTemplate + _32bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the accumulator (first operand) + pop_single_slot ; reduce the stack size by 1 slot + _32bit_slot_stack_to_rXX r11,0 ; copy the second value into a register (second operand) + xor r11d, r12d ; perform the bitwise xor operation + _32bit_slot_stack_from_rXX r11,0 ; store the result back on the java stack +template_end iXorTemplate + +template_start i2lTemplate + movsxd r12, dword [r10] ; takes the bottom 32 from the stack bits and sign extends + push_single_slot ; add slot for long (1 slot -> 2 slot) + _64bit_slot_stack_from_rXX r12,0 ; push the result back on the stack +template_end i2lTemplate + +template_start l2iTemplate + movsxd r12, [r10] ; takes the bottom 32 from the stack bits and sign extends + pop_single_slot ; remove extra slot for long (2 slot -> 1 slot) + _32bit_slot_stack_from_rXX r12,0 ; push the result back on the stack +template_end l2iTemplate + +template_start i2bTemplate + movsx r12, byte [r10] ; takes the top byte from the stack and sign extends + _32bit_slot_stack_from_rXX r12,0 ; push the result back on the stack +template_end i2bTemplate + +template_start i2sTemplate + movsx r12, word [r10] ; takes the top word from the stack and sign extends + _32bit_slot_stack_from_rXX r12,0 ; push the result back on the stack +template_end i2sTemplate + +template_start i2cTemplate + movzx r12, word [r10] ; takes the top word from the stack and zero extends + _32bit_slot_stack_from_rXX r12,0 ; push the result back on the stack +template_end i2cTemplate + +template_start i2dTemplate + cvtsi2sd xmm0, dword [r10] ; convert signed 32 bit integer to double-presision fp + push_single_slot ; make room for the double + movsd [r10], xmm0 ; push on top of stack +template_end i2dTemplate + +template_start l2dTemplate + cvtsi2sd xmm0, qword [r10] ; convert signed 64 bit integer to double-presision fp + movsd [r10], xmm0 ; push on top of stack +template_end l2dTemplate + +template_start d2iTemplate + movsd xmm0, [r10] ; load value from top of stack + mov r11, 2147483647 ; store max int in r11 + mov r12, -2147483648 ; store min int in r12 + mov rcx, 0 ; store 0 in rcx + cvtsi2sd xmm1, r11d ; convert max int to double + cvtsi2sd xmm2, r12d ; convert min int to double + cvttsd2si rax, xmm0 ; convert double to int with truncation + ucomisd xmm0, xmm1 ; compare value with max int + cmovae rax, r11 ; if value is above or equal to max int, store max int in rax + ucomisd xmm0, xmm2 ; compare value with min int + cmovb rax, r12 ; if value is below min int, store min int in rax + cmovp rax, rcx ; if value is NaN, store 0 in rax + pop_single_slot ; remove extra slot for double (2 slot -> 1 slot) + _64bit_slot_stack_from_rXX rax,0 ; store result back on stack +template_end d2iTemplate + +template_start d2lTemplate + movsd xmm0, [r10] ; load value from top of stack + mov r11, 9223372036854775807 ; store max long in r11 + mov r12, -9223372036854775808 ; store min long in r12 + mov rcx, 0 ; store 0 in rcx + cvtsi2sd xmm1, r11 ; convert max long to double + cvtsi2sd xmm2, r12 ; convert min long to double + cvttsd2si rax, xmm0 ; convert double to long with truncation + ucomisd xmm0, xmm1 ; compare value with max int + cmovae rax, r11 ; if value is above or equal to max long, store max int in rax + ucomisd xmm0, xmm2 ; compare value with min int + cmovb rax, r12 ; if value is below min int, store min int in rax + cmovp rax, rcx ; if value is NaN, store 0 in rax + _64bit_slot_stack_from_rXX rax,0 ; store back on stack +template_end d2lTemplate + +template_start iconstm1Template + push_single_slot ; increase the java stack size by 1 slot (8 bytes) + mov qword [r10], -1 ; push -1 to the java stack +template_end iconstm1Template + +template_start iconst0Template + push_single_slot ; increase the java stack size by 1 slot (8 bytes) + mov qword [r10], 0 ; push 0 to the java stack +template_end iconst0Template + +template_start iconst1Template + push_single_slot ; increase the java stack size by 1 slot (8 bytes) + mov qword [r10], 1 ; push 1 to the java stack +template_end iconst1Template + +template_start iconst2Template + push_single_slot ; increase the java stack size by 1 slot (8 bytes) + mov qword [r10], 2 ; push 2 to the java stack +template_end iconst2Template + +template_start iconst3Template + push_single_slot ; increase the java stack size by 1 slot (8 bytes) + mov qword [r10], 3 ; push 3 to the java stack +template_end iconst3Template + +template_start iconst4Template + push_single_slot ; increase the java stack size by 1 slot (8 bytes) + mov qword [r10], 4 ; push 4 to the java stack +template_end iconst4Template + +template_start iconst5Template + push_single_slot ; increase the java stack size by 1 slot (8 bytes) + mov qword [r10], 5 ; push 5 to the java stack +template_end iconst5Template + +template_start bipushTemplate + push_single_slot ; increase the stack by 1 slot + mov qword [r10], 0xFF ; push immediate value to stack +template_end bipushTemplate + +template_start sipushTemplatePrologue + mov r11w, 0xFF ; put immediate value in accumulator for sign extension +template_end sipushTemplatePrologue + +template_start sipushTemplate + movsx r11, r11w ; sign extend the contents of r11w through the rest of r11 + push_single_slot ; increase the stack by 1 slot + _32bit_slot_stack_from_rXX r11,0 ; push immediate value to stack +template_end sipushTemplate + +template_start iIncTemplate_01_load + _32bit_local_to_rXX_PATCH r11 +template_end iIncTemplate_01_load + +template_start iIncTemplate_02_add + add r11, byte 0xFF +template_end iIncTemplate_02_add + +template_start iIncTemplate_03_store + _32bit_local_from_rXX_PATCH r11 +template_end iIncTemplate_03_store + +template_start lAddTemplate + _64bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX r12,0 ; copy second value to the value register + add r11, r12 ; add the value to the accumulator + _64bit_slot_stack_from_rXX r11,0 ; write the accumulator over the second arg +template_end lAddTemplate + +template_start lSubTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the value register + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX r11,0 ; copy second value to the accumulator register + sub r11, r12 ; subtract the value from the accumulator + _64bit_slot_stack_from_rXX r11,0 ; write the accumulator over the second arg +template_end lSubTemplate + +template_start lMulTemplate + _64bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX r12,0 ; copy second value to the value register + imul r11, r12 ; multiply accumulator by the value to the accumulator + _64bit_slot_stack_from_rXX r11,0 ; write the accumulator over the second arg +template_end lMulTemplate + +template_start lDivTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the value register (divisor) + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX rax,0 ; copy second value (dividend) to rax + cqo ; extend sign bit from rax to rdx + idiv r12 ; divide rdx:rax by r12 value + _64bit_slot_stack_from_rXX rax,0 ; store the quotient in accumulator +template_end lDivTemplate + +template_start lRemTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the value register (divisor) + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX rax,0 ; copy second value (dividend) to rax + cqo ; extend sign bit from rax to rdx + idiv r12 ; divide rdx:rax by r12 value + _64bit_slot_stack_from_rXX rdx,0 ; store the remainder in accumulator +template_end lRemTemplate + +template_start lNegTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy the top value from the stack into r12 + neg r12 ; two's complement negation of r12 + _64bit_slot_stack_from_rXX r12,0 ; store the result back on the stack +template_end lNegTemplate + +template_start lShlTemplate + mov cl, [r10] ; copy top value of stack in the cl register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _64bit_slot_stack_to_rXX rax,0 ; copy second value to rax + sal rax, cl ; arithmetic left shift rax by cl bits + _64bit_slot_stack_from_rXX rax,0 ; store the result back on the stack +template_end lShlTemplate + +template_start lShrTemplate + mov cl, [r10] ; copy top value of stack in the cl register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _64bit_slot_stack_to_rXX rax,0 ; copy second value to rax + sar rax, cl ; arithmetic right shift rax by cl bits + _64bit_slot_stack_from_rXX rax,0 ; store the result back on the stack +template_end lShrTemplate + +template_start lUshrTemplate + mov cl, [r10] ; copy top value of stack in the cl register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _64bit_slot_stack_to_rXX rax,0 ; copy second value to rax + shr rax, cl ; logical right shift rax by cl bits + _64bit_slot_stack_from_rXX rax,0 ; store the result back on the stack +template_end lUshrTemplate + +template_start lAndTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the accumulator (first operand) + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX r11,0 ; copy the second value into a register (second operand) + and r11, r12 ; perform the bitwise and operation + _64bit_slot_stack_from_rXX r11,0 ; store the result back on the java stack +template_end lAndTemplate + +template_start lOrTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the accumulator (first operand) + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX r11,0 ; copy the second value into a register (second operand) + or r11 , r12 ; perform the bitwise or operation + _64bit_slot_stack_from_rXX r11,0 ; store the result back on the java stack +template_end lOrTemplate + +template_start lXorTemplate + _64bit_slot_stack_to_rXX r12,0 ; copy top value of stack in the accumulator (first operand) + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX r11,0 ; copy the second value into a register (second operand) + xor r11 , r12 ; perform the bitwise xor operation + _64bit_slot_stack_from_rXX r11,0 ; store the result back on the java stack +template_end lXorTemplate + +template_start lconst0Template + push_dual_slot ; increase the stack size by 2 slots (16 bytes) + mov qword [r10], 0 ; push 0 to the java stack +template_end lconst0Template + +template_start lconst1Template + push_dual_slot ; increase the stack size by 2 slots (16 bytes) + mov qword [r10], 1 ; push 1 to the java stack +template_end lconst1Template + +template_start fAddTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movss xmm1, [r10] ; copy top value of stack in xmm1 + pop_single_slot ; reduce the stack size by 1 slot + movss xmm0, [r10] ; copy the second value in xmm0 + addss xmm0, xmm1 ; add the values in xmm registers and store in xmm0 + movss [r10], xmm0 ; store the result in xmm0 on the java stack +template_end fAddTemplate + +template_start fSubTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movss xmm1, [r10] ; copy top value of stack in xmm1 + pop_single_slot ; reduce the stack size by 1 slot + movss xmm0, [r10] ; copy the second value in xmm0 + subss xmm0, xmm1 ; subtract the value of xmm1 from xmm0 and store in xmm0 + movss [r10], xmm0 ; store the result in xmm0 on the java stack +template_end fSubTemplate + +template_start fMulTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movss xmm1, [r10] ; copy top value of stack in xmm1 + pop_single_slot ; reduce the stack size by 1 slot + movss xmm0, [r10] ; copy the second value in xmm0 + mulss xmm0, xmm1 ; multiply the values in xmm registers and store in xmm0 + movss [r10], xmm0 ; store the result in xmm0 on the java stack +template_end fMulTemplate + +template_start fDivTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movss xmm1, [r10] ; copy top value of stack in xmm1 + pop_single_slot ; reduce the stack size by 1 slot + movss xmm0, [r10] ; copy the second value in xmm0 + divss xmm0, xmm1 ; divide the value of xmm0 by xmm1 and store in xmm0 + movss [r10], xmm0 ; store the result in xmm0 on the java stack +template_end fDivTemplate + +template_start fRemTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movss xmm0, [r10] ; copy top value of stack in xmm0 + pop_single_slot ; reduce the stack size by 1 slot + movss xmm1, [r10] ; copy the second value in xmm1 + call 0xefbeadde ; for function call +template_end fRemTemplate + +template_start fNegTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movss xmm0, [r10] ; copy top value of stack in xmm0 + mov eax, 0x80000000 ; loading mask immediate value to xmm1 through eax + movd xmm1, eax ; as it can't be directly loaded to xmm registers + xorps xmm0, xmm1 ; XOR xmm0 and xmm1 mask bits to negate xmm0 + movss [r10], xmm0 ; store the result in xmm0 on the java stack +template_end fNegTemplate + +template_start fconst0Template + push_single_slot ; increase the java stack size by 1 slot + fldz ; push 0 onto the fpu register stack + fbstp [r10] ; stores st0 on the java stack +template_end fconst0Template + +template_start fconst1Template + push_single_slot ; increase the java stack size by 1 slot + fld1 ; push 1 onto the fpu register stack + fbstp [r10] ; stores st0 on the java stack +template_end fconst1Template + +template_start fconst2Template + push_single_slot ; increase the java stack size by 1 slot + fld1 ; push 1 on the fpu register stack + fld1 ; push another 1 on the fpu register stack + fadd st0, st1 ; add the top two values together and store them back on the top of the stack + fbstp [r10] ; stores st0 on the java stack +template_end fconst2Template + +template_start dconst0Template + push_dual_slot ; increase the java stack size by 2 slots + fldz ; push 0 onto the fpu register stack + fbstp [r10] ; stores st0 on the java stack +template_end dconst0Template + +template_start dconst1Template + push_dual_slot ; increase the java stack size by 2 slots + fld1 ; push 1 onto the fpu register stack + fbstp [r10] ; stores st0 on the java stack +template_end dconst1Template + +template_start dAddTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movsd xmm1, [r10] ; copy top value of stack in xmm1 + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + movsd xmm0, [r10] ; copy the second value in xmm0 + addsd xmm0, xmm1 ; add the values in xmm registers and store in xmm0 + movsd [r10], xmm0 ; store the result in xmm0 on the java stack +template_end dAddTemplate + +template_start dSubTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movsd xmm1, [r10] ; copy top value of stack in xmm1 + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + movsd xmm0, [r10] ; copy the second value in xmm0 + subsd xmm0, xmm1 ; subtract the value of xmm1 from xmm0 and store in xmm0 + movsd [r10], xmm0 ; store the result in xmm0 on the java stack +template_end dSubTemplate + +template_start dMulTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movsd xmm1, [r10] ; copy top value of stack in xmm1 + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + movsd xmm0, [r10] ; copy the second value in xmm0 + mulsd xmm0, xmm1 ; multiply the values in xmm registers and store in xmm0 + movsd [r10], xmm0 ; store the result in xmm0 on the java stack +template_end dMulTemplate + +template_start dDivTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movsd xmm1, [r10] ; copy top value of stack in xmm1 + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + movsd xmm0, [r10] ; copy the second value in xmm0 + divsd xmm0, xmm1 ; divide the value of xmm0 by xmm1 and store in xmm0 + movsd [r10], xmm0 ; store the result in xmm0 on the java stack +template_end dDivTemplate + +template_start dRemTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movsd xmm0, [r10] ; copy top value of stack in xmm0 + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + movsd xmm1, [r10] ; copy the second value in xmm1 + call 0xefbeadde ; for function call +template_end dRemTemplate + +template_start dNegTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + movsd xmm0, [r10] ; copy top value of stack in xmm0 + mov rax, 0x8000000000000000 ; loading mask immediate value to xmm1 through rax + movq xmm1, rax ; as it can't be directly loaded to xmm registers + xorpd xmm0, xmm1 ; XOR xmm0 and xmm1 mask bits to negate xmm0 + movsd [r10], xmm0 ; store the result in xmm0 on the java stack +template_end dNegTemplate + +template_start i2fTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + cvtsi2ss xmm0, [r10] ; convert integer to float and store in xmm0 + movss [r10], xmm0 ; store the result in xmm0 on the java stack +template_end i2fTemplate + +template_start f2iTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + psrldq xmm2, 8 ; clear xmm2 by shifting right + movss xmm0, [r10] ; copy top value of stack in xmm0 + mov r11d, 2147483647 ; store max int in r11 + mov r12d, -2147483648 ; store min int in r12 + mov ecx, 0 ; store 0 in rcx + cvtsi2ss xmm1, r11d ; convert max int to float + cvtsi2ss xmm2, r12d ; convert min int to float + cvttss2si eax, xmm0 ; convert float to int with truncation + ucomiss xmm0, xmm1 ; compare value with max int + cmovae eax, r11d ; if value is above or equal to max int, store max int in rax + ucomiss xmm0, xmm2 ; compare value with min int + cmovb eax, r12d ; if value is below min int, store min int in rax + cmovp eax, ecx ; if value is NaN, store 0 in rax + _32bit_slot_stack_from_eXX ax,0 ; store result back on stack +template_end f2iTemplate + +template_start l2fTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + cvtsi2ss xmm0, qword [r10] ; convert long to float and store in xmm0 + pop_single_slot ; remove extra slot (2 slot -> 1 slot) + movss [r10], xmm0 ; store the result in xmm0 on the java stack +template_end l2fTemplate + +template_start f2lTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + psrldq xmm1, 8 ; clear xmm1 by shifting right + psrldq xmm2, 8 ; clear xmm2 by shifting right + movss xmm0, [r10] ; copy top value of stack in xmm0 + mov r11, 9223372036854775807 ; store max long in r11 + mov r12, -9223372036854775808 ; store min long in r12 + mov rcx, 0 ; store 0 in rcx + cvtsi2ss xmm1, r11 ; convert max long to float + cvtsi2ss xmm2, r12 ; convert min long to float + cvttss2si rax, xmm0 ; convert float to long with truncation + ucomiss xmm0, xmm1 ; compare value with max long + cmovae rax, r11 ; if value is above or equal to max long, store max long in rax + ucomiss xmm0, xmm2 ; compare value with min int + cmovb rax, r12 ; if value is below min long, store min long in rax + cmovp rax, rcx ; if value is NaN, store 0 in rax + push_single_slot ; add extra slot for long (1 slot -> 2 slot) + _64bit_slot_stack_from_rXX rax,0 ; store result back on stack +template_end f2lTemplate + +template_start d2fTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + movsd xmm0, [r10] ; copy top value of stack in xmm0 + cvtsd2ss xmm0, xmm0 ; convert double to float and store in xmm0 + pop_single_slot ; remove extra slot from double (2 slot -> 1 slot) + movss [r10], xmm0 ; store the result in xmm0 on the java stack +template_end d2fTemplate + +template_start f2dTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + movss xmm0, [r10] ; copy top value of stack in xmm0 + cvtss2sd xmm0, xmm0 ; convert float to double and store in xmm0 + push_single_slot ; add slot to stack for double (1 slot -> 2 slot) + movsd [r10], xmm0 ; store the result in xmm0 on the java stack +template_end f2dTemplate + +template_start eaxReturnTemplate + xor rax, rax ; clear rax + _32bit_slot_stack_to_eXX ax,0 ; move the stack top into eax register +template_end eaxReturnTemplate + +template_start raxReturnTemplate + _64bit_slot_stack_to_rXX rax,0 ; move the stack top into rax register +template_end raxReturnTemplate + +template_start xmm0ReturnTemplate + psrldq xmm0, 8 ; clear xmm0 by shifting right + movd xmm0, [r10] ; move the stack top into xmm0 register +template_end xmm0ReturnTemplate + +template_start retTemplate_add + add r10, byte 0xFF ; decrease stack size +template_end retTemplate_add + +template_start vReturnTemplate + ret ; return from the JITed method +template_end vReturnTemplate + +; Stack before: ..., value1, value2 -> +; Stack after: ..., result +; if value1 > value2, result = 1 +; if value1 == value 2, result = 0 +; if value1 < value 2, result = -1 +template_start lcmpTemplate + _64bit_slot_stack_to_rXX r11,0 ; grab value2 from the stack + pop_dual_slot ; reduce the stack size by 2 slots (16 bytes) + _64bit_slot_stack_to_rXX r12,0 ; grab value1 from the stack + pop_single_slot ; reduce long slot to int slot + xor ecx, ecx ; clear ecx (store 0) + mov dword edx, -1 ; store -1 in edx + mov dword ebx, 1 ; store 1 in ebx + cmp r12, r11 ; compare the two values + cmovg eax, ebx ; mov 1 into eax if ZF = 0 and CF = 0 + cmovl eax, edx ; mov -1 into eax if CF = 1 + cmovz eax, ecx ; mov 0 into eax if ZF = 1 + _32bit_slot_stack_from_eXX ax,0 ; store result on top of stack +template_end lcmpTemplate + +; Stack before: ..., value1, value2 -> +; Stack after: ..., result +; if value1 > value2, result = 1 Flags: ZF,PF,CF --> 000 +; else if value1 == value 2, result = 0 Flags: ZF,PF,CF --> 100 +; else if value1 < value 2, result = -1 Flags: ZF,PF,CF --> 001 +; else, one of value1 or value2 must be NaN -> result = -1 Flags: ZF,PF,CF --> 111 +template_start fcmplTemplate + movss xmm0, dword [r10] ; copy value2 into xmm0 + pop_single_slot ; decrease the stack by 1 slot + movss xmm1, dword [r10] ; copy value1 into xmm1 + xor r11d, r11d ; clear r11 (store 0) + mov dword eax, 1 ; store 1 in rax + mov dword ecx, -1 ; store -1 in rcx + comiss xmm1, xmm0 ; compare the values + cmove r12d, r11d ; mov 0 into r12 if ZF = 1 + cmova r12d, eax ; mov 1 into r12 if ZF = 0 and CF = 0 + cmovb r12d, ecx ; mov -1 into r12 if CF = 1 + cmovp r12d, ecx ; mov -1 into r12 if PF = 1 + _32bit_slot_stack_from_rXX r12,0 ; store result on top of stack +template_end fcmplTemplate + +; Stack before: ..., value1, value2 -> +; Stack after: ..., result +; if value1 > value2, result = 1 Flags: ZF,PF,CF --> 000 +; else if value1 == value 2, result = 0 Flags: ZF,PF,CF --> 100 +; else if value1 < value 2, result = -1 Flags: ZF,PF,CF --> 001 +; else, one of value1 or value2 must be NaN -> result = 1 Flags: ZF,PF,CF --> 111 +template_start fcmpgTemplate + movss xmm0, dword [r10] ; copy value2 into xmm0 + pop_single_slot ; decrease the stack by 1 slot + movss xmm1, dword [r10] ; copy value1 into xmm1 + xor r11d, r11d ; clear r11 (store 0) + mov dword eax, 1 ; store 1 in rax + mov dword ecx, -1 ; store -1 in rcx + comiss xmm1, xmm0 ; compare the values + cmove r12d, r11d ; mov 0 into r12 if ZF = 1 + cmovb r12d, ecx ; mov -1 into r12 if CF = 1 + cmova r12d, eax ; mov 1 into r12 if ZF = 0 and CF = 0 + cmovp r12d, eax ; mov 1 into r12 if PF = 1 + _32bit_slot_stack_from_rXX r12,0 ; store result on top of stack +template_end fcmpgTemplate + +; Stack before: ..., value1, value2 -> +; Stack after: ..., result +; if value1 > value2, result = 1 Flags: ZF,PF,CF --> 000 +; else if value1 == value 2, result = 0 Flags: ZF,PF,CF --> 100 +; else if value1 < value 2, result = -1 Flags: ZF,PF,CF --> 001 +; else, one of value1 or value2 must be NaN -> result = -1 Flags: ZF,PF,CF --> 111 +template_start dcmplTemplate + movsd xmm0, qword [r10] ; copy value2 into xmm0 + pop_dual_slot ; decrease the stack by 2 slots + movsd xmm1, qword [r10] ; copy value1 into xmm1 + pop_single_slot ; decrease the stack by 1 slot + xor r11d, r11d ; clear r11 (store 0) + mov dword eax, 1 ; store 1 in rax + mov dword ecx, -1 ; store -1 in rcx + comisd xmm1, xmm0 ; compare the values + cmove r12d, r11d ; mov 0 into r12 if ZF = 1 + cmova r12d, eax ; mov 1 into r12 if ZF = 0 and CF = 0 + cmovb r12d, ecx ; mov -1 into r12 if CF = 1 + cmovp r12d, ecx ; mov -1 into r12 if PF = 1 + _32bit_slot_stack_from_rXX r12,0 ; store result on top of stack +template_end dcmplTemplate + +; Stack before: ..., value1, value2 -> +; Stack after: ..., result +; if value1 > value2, result = 1 Flags: ZF,PF,CF --> 000 +; else if value1 == value 2, result = 0 Flags: ZF,PF,CF --> 100 +; else if value1 < value 2, result = -1 Flags: ZF,PF,CF --> 001 +; else, one of value1 or value2 must be NaN -> result = 1 Flags: ZF,PF,CF --> 111 +template_start dcmpgTemplate + movsd xmm0, qword [r10] ; copy value2 into xmm0 + pop_dual_slot ; decrease the stack by 1 slot + movsd xmm1, qword [r10] ; copy value1 into xmm1 + pop_single_slot ; decrease the stack by 1 slot + xor r11d, r11d ; clear r11 (store 0) + mov dword eax, 1 ; store 1 in rax + mov dword ecx, -1 ; store -1 in rcx + comisd xmm1, xmm0 ; compare the values + cmove r12d, r11d ; mov 0 into r12 if ZF = 1 + cmovb r12d, ecx ; mov -1 into r12 if CF = 1 + cmova r12d, eax ; mov 1 into r12 if ZF = 0 and CF = 0 + cmovp r12d, eax ; mov 1 into r12 if PF = 1 + _32bit_slot_stack_from_rXX r12,0 ; store result on top of stack +template_end dcmpgTemplate + +; ifne succeeds if and only if value ≠ 0 +; Stack before: ..., value -> +; Stack after: ... +template_start ifneTemplate + _32bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + test r11d, r11d ; r11 - r11 + jnz 0xefbeadde ; Used for generating jump not equal to far labels +template_end ifneTemplate + +; ifeq succeeds if and only if value = 0 +; Stack before: ..., value -> +; Stack after: ... +template_start ifeqTemplate + _32bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + test r11d, r11d ; r11 - r11 + jz 0xefbeadde ; Used for generating jump to far labels if ZF = 1 +template_end ifeqTemplate + +; iflt succeeds if and only if value < 0 +; Stack before: ..., value -> +; Stack after: ... +template_start ifltTemplate + _32bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + test r11d, r11d ; r11 - r11 + jl 0xefbeadde ; Used for generating jump to far labels if SF <> OF +template_end ifltTemplate + +; ifge succeeds if and only if value >= 0 +; Stack before: ..., value -> +; Stack after: ... +template_start ifgeTemplate + _32bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + test r11d, r11d ; r11 - r11 + jge 0xefbeadde ; Used for generating jump to far labels if SF = OF +template_end ifgeTemplate + +; ifgt succeeds if and only if value > 0 +; Stack before: ..., value -> +; Stack after: ... +template_start ifgtTemplate + _32bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + test r11d, r11d ; r11 - r11 + jg 0xefbeadde ; Used for generating jump to far labels if ZF = 0 and SF = OF +template_end ifgtTemplate + +; ifle succeeds if and only if value <= 0 +; Stack before: ..., value -> +; Stack after: ... +template_start ifleTemplate + _32bit_slot_stack_to_rXX r11,0 ; pop first value off java stack into the accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + test r11d, r11d ; r11 - r11 + jle 0xefbeadde ; Used for generating jump to far labels if ZF = 1 or SF <> OF +template_end ifleTemplate + +; if_icmpeq succeeds if and only if value1 = value2 +; Stack before: ..., value1, value2 -> +; Stack after: ... +template_start ificmpeqTemplate + _32bit_slot_stack_to_rXX r12,0 ; pop first value off java stack into the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r11,0 ; pop second value off java stack into + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + cmp r11d, r12d ; r12 (minuend) - r11 (subtrahend) and set EFLAGS + je 0xefbeadde ; Used for generating jump greater than equal to far labels +template_end ificmpeqTemplate + +template_start ifacmpeqTemplate + _64bit_slot_stack_to_rXX r12,0 ; pop first value off java stack into the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _64bit_slot_stack_to_rXX r11,0 ; pop second value off java stack into accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + cmp r11, r12 ; r12 (minuend) - r11 (subtrahend) and set EFLAGS + je 0xefbeadde ; Used for generating jump greater than equal to far labels +template_end ifacmpeqTemplate + +; if_icmpne succeeds if and only if value1 ≠ value2 +; Stack before: ..., value1, value2 -> +; Stack after: ... +template_start ificmpneTemplate + _32bit_slot_stack_to_rXX r12,0 ; pop first value off java stack into the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r11,0 ; pop second value off java stack into accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + cmp r11d, r12d ; r12 (minuend) - r11 (subtrahend) and set EFLAGS + jne 0xefbeadde ; Used for generating jump greater than equal to far labels +template_end ificmpneTemplate + +; if_icmpne succeeds if and only if value1 ≠ value2 +; Stack before: ..., value1, value2 -> +; Stack after: ... +template_start ifacmpneTemplate + _64bit_slot_stack_to_rXX r12,0 ; pop first value off java stack into the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _64bit_slot_stack_to_rXX r11,0 ; pop second value off java stack into accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + cmp r11, r12 ; r12 (minuend) - r11 (subtrahend) and set EFLAGS + jne 0xefbeadde ; Used for generating jump greater than equal to far labels +template_end ifacmpneTemplate + +; if_icmplt succeeds if and only if value1 < value2 +; Stack before: ..., value1, value2 -> +; Stack after: ... +template_start ificmpltTemplate + _32bit_slot_stack_to_rXX r12,0 ; pop first value off java stack into the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r11,0 ; pop second value off java stack into accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + cmp r11d, r12d ; r12 (minuend) - r11 (subtrahend) and set EFLAGS + jl 0xefbeadde ; Used for generating jump greater than equal to far labels +template_end ificmpltTemplate + +; if_icmple succeeds if and only if value1 < value2 +; Stack before: ..., value1, value2 -> +; Stack after: ... +template_start ificmpleTemplate + _32bit_slot_stack_to_rXX r12,0 ; pop first value off java stack into the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r11,0 ; pop second value off java stack into accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + cmp r11d, r12d ; r12 (minuend) - r11 (subtrahend) and set EFLAGS + jle 0xefbeadde ; Used for generating jump greater than equal to far labels +template_end ificmpleTemplate + +; if_icmpge succeeds if and only if value1 ≥ value2 +; Stack before: ..., value1, value2 -> +; Stack after: ... +template_start ificmpgeTemplate + _32bit_slot_stack_to_rXX r12,0 ; pop first value off java stack into the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r11,0 ; pop second value off java stack into accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + cmp r11d, r12d ; r12 (minuend) - r11 (subtrahend) and set EFLAGS + jge 0xefbeadde ; Used for generating jump greater than equal to far labels +template_end ificmpgeTemplate + +; if_icmpgt succeeds if and only if value1 > value2 +; Stack before: ..., value1, value2 -> +; Stack after: ... +template_start ificmpgtTemplate + _32bit_slot_stack_to_rXX r12,0 ; pop first value off java stack into the value register + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + _32bit_slot_stack_to_rXX r11,0 ; pop second value off java stack into accumulator + pop_single_slot ; reduce the stack size by 1 slot (8 bytes) + cmp r11d, r12d ; r12 (minuend) - r11 (subtrahend) and set EFLAGS + jg 0xefbeadde ; Used for generating jump greater than to far labels +template_end ificmpgtTemplate + +template_start gotoTemplate + jmp 0xefbeadde ; Used for generating jump not equal to far labels +template_end gotoTemplate + +; Below bytecodes are untested + +; if_null succeeds if and only if value == 0 +; Stack before: ..., value -> +; Stack after: ... +template_start ifnullTemplate + _64bit_slot_stack_to_rXX rax,0 ; grab the reference from the operand stack + pop_single_slot ; reduce the mjit stack by one slot + cmp rax, 0 ; compare against 0 (null) + je 0xefbeadde ; jump if rax is null +template_end ifnullTemplate + +; if_nonnull succeeds if and only if value != 0 +; Stack before: ..., value -> +; Stack after: ... +template_start ifnonnullTemplate + _64bit_slot_stack_to_rXX rax,0 ; grab the reference from the operand stack + pop_single_slot ; reduce the mjit stack by one slot + cmp rax, 0 ; compare against 0 (null) + jne 0xefbeadde ; jump if rax is not null +template_end ifnonnullTemplate diff --git a/runtime/compiler/microjit/x/amd64/templates/linkage.nasm b/runtime/compiler/microjit/x/amd64/templates/linkage.nasm new file mode 100644 index 00000000000..448418e2fb9 --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/templates/linkage.nasm @@ -0,0 +1,488 @@ +; Copyright (c) 2022, 2022 IBM Corp. and others +; +; This program and the accompanying materials are made available under +; the terms of the Eclipse Public License 2.0 which accompanies this +; distribution and is available at https://www.eclipse.org/legal/epl-2.0/ +; or the Apache License, Version 2.0 which accompanies this distribution and +; is available at https://www.apache.org/licenses/LICENSE-2.0. +; +; This Source Code may also be made available under the following +; Secondary Licenses when the conditions for such availability set +; forth in the Eclipse Public License, v. 2.0 are satisfied: GNU +; General Public License, version 2 with the GNU Classpath +; Exception [1] and GNU General Public License, version 2 with the +; OpenJDK Assembly Exception [2]. +; +; [1] https://www.gnu.org/software/classpath/license.html +; [2] http://openjdk.java.net/legal/assembly-exception.html +; +; SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception +%include "utils.nasm" + +; +; Create templates +; + +section .text + +template_start saveXMM0Local + movq [r14 + 0xefbeadde], xmm0 +template_end saveXMM0Local + +template_start saveXMM1Local + movq [r14 + 0xefbeadde], xmm1 +template_end saveXMM1Local + +template_start saveXMM2Local + movq [r14 + 0xefbeadde], xmm2 +template_end saveXMM2Local + +template_start saveXMM3Local + movq [r14 + 0xefbeadde], xmm3 +template_end saveXMM3Local + +template_start saveXMM4Local + movq [r14 + 0xefbeadde], xmm4 +template_end saveXMM4Local + +template_start saveXMM5Local + movq [r14 + 0xefbeadde], xmm5 +template_end saveXMM5Local + +template_start saveXMM6Local + movq [r14 + 0xefbeadde], xmm6 +template_end saveXMM6Local + +template_start saveXMM7Local + movq [r14 + 0xefbeadde], xmm7 +template_end saveXMM7Local + +template_start saveRAXLocal + mov [r14 + 0xefbeadde], rax +template_end saveRAXLocal + +template_start saveRSILocal + mov [r14 + 0xefbeadde], rsi +template_end saveRSILocal + +template_start saveRDXLocal + mov [r14 + 0xefbeadde], rdx +template_end saveRDXLocal + +template_start saveRCXLocal + mov [r14 + 0xefbeadde], rcx +template_end saveRCXLocal + +template_start saveR11Local + mov [r14 + 0xefbeadde], r11 +template_end saveR11Local + +template_start movRSPOffsetR11 + mov r11, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end movRSPOffsetR11 + +template_start movRSPR10 + mov r10, rsp ; Used for allocating space on the stack. +template_end movRSPR10 + +template_start movR10R14 + mov r14, r10 ; Used for allocating space on the stack. +template_end movR10R14 + +template_start subR10Imm4 + sub r10, 0x7fffffff ; Used for allocating space on the stack. +template_end subR10Imm4 + +template_start addR10Imm4 + add r10, 0x7fffffff ; Used for allocating space on the stack. +template_end addR10Imm4 + +template_start subR14Imm4 + sub r14, 0x7fffffff ; Used for allocating space on the stack. +template_end subR14Imm4 + +template_start addR14Imm4 + add r14, 0x7fffffff ; Used for allocating space on the stack. +template_end addR14Imm4 + +template_start saveRBPOffset + mov [rsp+0xff], rbp ; Used ff as all other labels look valid and 255 will be rare. +template_end saveRBPOffset + +template_start saveEBPOffset + mov [rsp+0xff], ebp ; Used ff as all other labels look valid and 255 will be rare. +template_end saveEBPOffset + +template_start saveRSPOffset + mov [rsp+0xff], rsp ; Used ff as all other labels look valid and 255 will be rare. +template_end saveRSPOffset + +template_start saveESPOffset + mov [rsp+0xff], rsp ; Used ff as all other labels look valid and 255 will be rare. +template_end saveESPOffset + +template_start loadRBPOffset + mov rbp, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadRBPOffset + +template_start loadEBPOffset + mov ebp, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadEBPOffset + +template_start loadRSPOffset + mov rsp, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadRSPOffset + +template_start loadESPOffset + mov rsp, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadESPOffset + +template_start saveEAXOffset + mov [rsp+0xff], eax ; Used ff as all other labels look valid and 255 will be rare. +template_end saveEAXOffset + +template_start saveESIOffset + mov [rsp+0xff], esi ; Used ff as all other labels look valid and 255 will be rare. +template_end saveESIOffset + +template_start saveEDXOffset + mov [rsp+0xff], edx ; Used ff as all other labels look valid and 255 will be rare. +template_end saveEDXOffset + +template_start saveECXOffset + mov [rsp+0xff], ecx ; Used ff as all other labels look valid and 255 will be rare. +template_end saveECXOffset + +template_start saveEBXOffset + mov [rsp+0xff], ebx ; Used ff as all other labels look valid and 255 will be rare. +template_end saveEBXOffset + +template_start loadEAXOffset + mov eax, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadEAXOffset + +template_start loadESIOffset + mov esi, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadESIOffset + +template_start loadEDXOffset + mov edx, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadEDXOffset + +template_start loadECXOffset + mov ecx, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadECXOffset + +template_start loadEBXOffset + mov ebx, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadEBXOffset + +template_start addRSPImm8 + add rsp, 0x7fffffffffffffff ; Used for allocating space on the stack. +template_end addRSPImm8 + +template_start addRSPImm4 + add rsp, 0x7fffffff ; Used for allocating space on the stack. +template_end addRSPImm4 + +template_start addRSPImm2 + add rsp, 0x7fff ; Used for allocating space on the stack. +template_end addRSPImm2 + +template_start addRSPImm1 + add rsp, 0x7f ; Used for allocating space on the stack. +template_end addRSPImm1 + +template_start je4ByteRel + je 0xff; +template_end je4ByteRel + +template_start jeByteRel + je 0xff; +template_end jeByteRel + +template_start subRSPImm8 + sub rsp, 0x7fffffffffffffff ; Used for allocating space on the stack. +template_end subRSPImm8 + +template_start subRSPImm4 + sub rsp, 0x7fffffff ; Used for allocating space on the stack. +template_end subRSPImm4 + +template_start subRSPImm2 + sub rsp, 0x7fff ; Used for allocating space on the stack. +template_end subRSPImm2 + +template_start subRSPImm1 + sub rsp, 0x7f ; Used for allocating space on the stack. +template_end subRSPImm1 + +template_start jbe4ByteRel + jbe 0xefbeadde ; Used for generating jumps to far labels. +template_end jbe4ByteRel + +template_start cmpRspRbpDerefOffset + cmp rsp, [rbp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end cmpRspRbpDerefOffset + +template_start loadRAXOffset + mov rax, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadRAXOffset + +template_start loadRSIOffset + mov rsi, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadRSIOffset + +template_start loadRDXOffset + mov rdx, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadRDXOffset + +template_start loadRCXOffset + mov rcx, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadRCXOffset + +template_start loadRBXOffset + mov rbx, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadRBXOffset + +template_start loadR9Offset + mov r9, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadR9Offset + +template_start loadR10Offset + mov r10, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadR10Offset + +template_start loadR11Offset + mov r11, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadR11Offset + +template_start loadR12Offset + mov r12, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadR12Offset + +template_start loadR13Offset + mov r13, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadR13Offset + +template_start loadR14Offset + mov r14, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadR14Offset + +template_start loadR15Offset + mov r15, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadR15Offset + +template_start loadXMM0Offset + movq xmm0, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadXMM0Offset + +template_start loadXMM1Offset + movq xmm1, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadXMM1Offset + +template_start loadXMM2Offset + movq xmm2, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadXMM2Offset + +template_start loadXMM3Offset + movq xmm3, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadXMM3Offset + +template_start loadXMM4Offset + movq xmm4, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadXMM4Offset + +template_start loadXMM5Offset + movq xmm5, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadXMM5Offset + +template_start loadXMM6Offset + movq xmm6, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadXMM6Offset + +template_start loadXMM7Offset + movq xmm7, [rsp+0xff] ; Used ff as all other labels look valid and 255 will be rare. +template_end loadXMM7Offset + +template_start saveRAXOffset + mov [rsp+0xff], rax ; Used ff as all other labels look valid and 255 will be rare. +template_end saveRAXOffset + +template_start saveRSIOffset + mov [rsp+0xff], rsi ; Used ff as all other labels look valid and 255 will be rare. +template_end saveRSIOffset + +template_start saveRDXOffset + mov [rsp+0xff], rdx ; Used ff as all other labels look valid and 255 will be rare. +template_end saveRDXOffset + +template_start saveRCXOffset + mov [rsp+0xff], rcx ; Used ff as all other labels look valid and 255 will be rare. +template_end saveRCXOffset + +template_start saveRBXOffset + mov [rsp+0xff], rbx ; Used ff as all other labels look valid and 255 will be rare. +template_end saveRBXOffset + +template_start saveR9Offset + mov [rsp+0xff], r9 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveR9Offset + +template_start saveR10Offset + mov [rsp+0xff], r10 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveR10Offset + +template_start saveR11Offset + mov [rsp+0xff], r11 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveR11Offset + +template_start saveR12Offset + mov [rsp+0xff], r12 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveR12Offset + +template_start saveR13Offset + mov [rsp+0xff], r13 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveR13Offset + +template_start saveR14Offset + mov [rsp+0xff], r14 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveR14Offset + +template_start saveR15Offset + mov [rsp+0xff], r15 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveR15Offset + +template_start saveXMM0Offset + movq [rsp+0xff], xmm0 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveXMM0Offset + +template_start saveXMM1Offset + movq [rsp+0xff], xmm1 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveXMM1Offset + +template_start saveXMM2Offset + movq [rsp+0xff], xmm2 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveXMM2Offset + +template_start saveXMM3Offset + movq [rsp+0xff], xmm3 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveXMM3Offset + +template_start saveXMM4Offset + movq [rsp+0xff], xmm4 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveXMM4Offset + +template_start saveXMM5Offset + movq [rsp+0xff], xmm5 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveXMM5Offset + +template_start saveXMM6Offset + movq [rsp+0xff], xmm6 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveXMM6Offset + +template_start saveXMM7Offset + movq [rsp+0xff], xmm7 ; Used ff as all other labels look valid and 255 will be rare. +template_end saveXMM7Offset + +template_start call4ByteRel + call 0xefbeadde ; Used for generating jumps to far labels. +template_end call4ByteRel + +template_start callByteRel + call 0xff ; Used for generating jumps to nearby labels, used ff as all other labels look valid and 255 will be rare. +template_end callByteRel + +template_start jump4ByteRel + jmp 0xefbeadde ; Used for generating jumps to far labels. +template_end jump4ByteRel + +template_start jumpByteRel + jmp short $+0x02 ; Used for generating jumps to nearby labels, shows up as eb 00 in memory (easy to see). +template_end jumpByteRel + +template_start nopInstruction + nop ; Used for alignment, one byte wide. +template_end nopInstruction + +template_start movRDIImm64 + mov rdi, 0xefbeaddeefbeadde ; looks like 'deadbeef' in objdump. +template_end movRDIImm64 + +template_start movEDIImm32 + mov edi, 0xefbeadde ; looks like 'deadbeef' in objdump. +template_end movEDIImm32 + +template_start movRAXImm64 + mov rax, 0xefbeaddeefbeadde ; looks like 'deadbeef' in objdump. +template_end movRAXImm64 + +template_start movEAXImm32 + mov eax, 0xefbeadde ; looks like 'deadbeef' in objdump. +template_end movEAXImm32 + +template_start jumpRDI + jmp rdi ; Used for jumping to absolute addresses, store in RDI then generate this. +template_end jumpRDI + +template_start jumpRAX + jmp rax ; Used for jumping to absolute addresses, store in RAX then generate this. +template_end jumpRAX + +template_start paintRegister + mov rax, qword 0x0df0adde0df0adde +template_end paintRegister + +template_start paintLocal + mov [r14+0xefbeadde], rax +template_end paintLocal + +template_start moveCountAndRecompile + mov eax, [qword 0xefbeaddeefbeadde] +template_end moveCountAndRecompile + +template_start checkCountAndRecompile + test eax,eax + je 0xefbeadde +template_end checkCountAndRecompile + +template_start loadCounter + mov eax, [qword 0xefbeaddeefbeadde] +template_end loadCounter + +template_start decrementCounter + sub eax, 1 + mov [qword 0xefbeaddeefbeadde], eax +template_end decrementCounter + +template_start incrementCounter + inc rax + mov [qword 0xefbeaddeefbeadde], eax +template_end incrementCounter + +template_start jgCount + test eax, eax + jg 0xefbeadde +template_end jgCount + +template_start callRetranslateArg1 + mov rax, qword 0x0df0adde0df0adde +template_end callRetranslateArg1 + +template_start callRetranslateArg2 + mov rsi, qword 0x0df0adde0df0adde +template_end callRetranslateArg2 + +template_start callRetranslate + mov edi, 0x00080000 + call 0xefbeadde +template_end callRetranslate + +template_start setCounter + mov rax, 0x2710 + mov [qword 0xefbeaddeefbeadde], eax +template_end setCounter + +template_start jmpToBody + jmp 0xefbeadde +template_end jmpToBody diff --git a/runtime/compiler/microjit/x/amd64/templates/microjit.nasm b/runtime/compiler/microjit/x/amd64/templates/microjit.nasm new file mode 100644 index 00000000000..812c2a81b4f --- /dev/null +++ b/runtime/compiler/microjit/x/amd64/templates/microjit.nasm @@ -0,0 +1,94 @@ +; Copyright (c) 2022, 2022 IBM Corp. and others +; +; This program and the accompanying materials are made available under +; the terms of the Eclipse Public License 2.0 which accompanies this +; distribution and is available at https://www.eclipse.org/legal/epl-2.0/ +; or the Apache License, Version 2.0 which accompanies this distribution and +; is available at https://www.apache.org/licenses/LICENSE-2.0. +; +; This Source Code may also be made available under the following +; Secondary Licenses when the conditions for such availability set +; forth in the Eclipse Public License, v. 2.0 are satisfied: GNU +; General Public License, version 2 with the GNU Classpath +; Exception [1] and GNU General Public License, version 2 with the +; OpenJDK Assembly Exception [2]. +; +; [1] https://www.gnu.org/software/classpath/license.html +; [2] http://openjdk.java.net/legal/assembly-exception.html +; +; SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception + +;MicroJIT Virtual Machine to X86-64 mapping +; rsp: +; is the stack extent for the java value stack pointer. +; r10: +; will hold the java value stack pointer. +; r11: +; stores the accumulator, or +; stores a pointer to an object +; r12: +; stores any value which will act on the accumulator, or +; stores the value to be written to an object field, or +; stores the value read from an object field +; r13: +; holds addresses for absolute addressing +; r14: +; holds a pointer to the start of the local array +; r15: +; stores values loaded from memory for storing on the stack or in the local array + +%macro pop_single_slot 0 +add r10, 8 +%endmacro + +%macro pop_dual_slot 0 +add r10, 16 +%endmacro + +%macro push_single_slot 0 +sub r10, 8 +%endmacro + +%macro push_dual_slot 0 +sub r10, 16 +%endmacro + +%macro _32bit_local_to_rXX_PATCH 1 +mov %1d, dword [r14 + 0xefbeadde] +%endmacro + +%macro _64bit_local_to_rXX_PATCH 1 +mov %{1}, qword [r14 + 0xefbeadde] +%endmacro + +%macro _32bit_local_from_rXX_PATCH 1 +mov dword [r14 + 0xefbeadde], %1d +%endmacro + +%macro _64bit_local_from_rXX_PATCH 1 +mov qword [r14 + 0xefbeadde], %1 +%endmacro + +%macro _32bit_slot_stack_to_rXX 2 +mov %1d, dword [r10 + %2] +%endmacro + +%macro _32bit_slot_stack_to_eXX 2 +mov e%{1}, dword [r10 + %2] +%endmacro + +%macro _64bit_slot_stack_to_rXX 2 +mov %{1}, qword [r10 + %2] +%endmacro + +%macro _32bit_slot_stack_from_rXX 2 +mov dword [r10 + %2], %1d +%endmacro + +%macro _32bit_slot_stack_from_eXX 2 +mov dword [r10 + %2], e%1 +%endmacro + +%macro _64bit_slot_stack_from_rXX 2 +mov qword [r10 + %2], %1 +%endmacro diff --git a/runtime/compiler/optimizer/J9Optimizer.cpp b/runtime/compiler/optimizer/J9Optimizer.cpp index 78b690a26cb..0496b1fdea3 100644 --- a/runtime/compiler/optimizer/J9Optimizer.cpp +++ b/runtime/compiler/optimizer/J9Optimizer.cpp @@ -86,6 +86,13 @@ #include "optimizer/MethodHandleTransformer.hpp" #include "optimizer/VectorAPIExpansion.hpp" +#if defined(J9VM_OPT_MICROJIT) +static const OptimizationStrategy J9MicroJITOpts[] = + { + { OMR::jProfilingBlock, OMR::MustBeDone }, + { OMR::endOpts } + }; +#endif /* J9VM_OPT_MICROJIT */ static const OptimizationStrategy J9EarlyGlobalOpts[] = { @@ -1018,6 +1025,13 @@ J9::Optimizer::optimizationStrategy(TR::Compilation *c) } } +#if defined(J9VM_OPT_MICROJIT) +const OptimizationStrategy * +J9::Optimizer::microJITOptimizationStrategy(TR::Compilation *c) + { + return J9MicroJITOpts; + } +#endif /* J9VM_OPT_MICROJIT */ ValueNumberInfoBuildType J9::Optimizer::valueNumberInfoBuildType() diff --git a/runtime/compiler/optimizer/J9Optimizer.hpp b/runtime/compiler/optimizer/J9Optimizer.hpp index 600b62c27a3..1b5992252c9 100644 --- a/runtime/compiler/optimizer/J9Optimizer.hpp +++ b/runtime/compiler/optimizer/J9Optimizer.hpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2019 IBM Corp. and others + * Copyright (c) 2000, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -61,6 +61,9 @@ class Optimizer : public OMR::OptimizerConnector bool switchToProfiling(); static const OptimizationStrategy *optimizationStrategy(TR::Compilation *c); +#if defined(J9VM_OPT_MICROJIT) + static const OptimizationStrategy *microJITOptimizationStrategy(TR::Compilation *c); +#endif static ValueNumberInfoBuildType valueNumberInfoBuildType(); private: diff --git a/runtime/compiler/optimizer/JProfilingBlock.cpp b/runtime/compiler/optimizer/JProfilingBlock.cpp index 34947676d4b..bf75ab8c814 100644 --- a/runtime/compiler/optimizer/JProfilingBlock.cpp +++ b/runtime/compiler/optimizer/JProfilingBlock.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2021 IBM Corp. and others + * Copyright (c) 2000, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -35,6 +35,7 @@ #include "control/Recompilation.hpp" #include "control/RecompilationInfo.hpp" #include "optimizer/TransformUtil.hpp" +#include "env/j9method.h" // Global thresholds for the number of method enters required to trip // method recompilation - these are adjusted in the JIT hook control logic @@ -595,6 +596,12 @@ void TR_JProfilingBlock::computeMinimumSpanningTree(BlockParents &parent, BlockP BlockWeights weights(cfg->getNextNodeNumber(), stackMemoryRegion); TR::BlockChecklist inMST(comp()); + char *classname = comp()->getMethodBeingCompiled()->classNameChars(); + uint16_t classnamelength = comp()->getMethodBeingCompiled()->classNameLength(); + + char *name = comp()->getMethodBeingCompiled()->nameChars(); + uint16_t namelength = comp()->getMethodBeingCompiled()->nameLength(); + // Prim's init { TR::Block *first = optimizer()->getMethodSymbol()->getFirstTreeTop()->getNode()->getBlock(); @@ -602,6 +609,11 @@ void TR_JProfilingBlock::computeMinimumSpanningTree(BlockParents &parent, BlockP Q.push(std::make_pair(0,first)); } + if (trace()) + { + traceMsg(comp(), "class.method:from_block:to_block:frequency\n"); + } + while (!Q.empty()) { TR::Block *block = Q.top().second; @@ -609,14 +621,19 @@ void TR_JProfilingBlock::computeMinimumSpanningTree(BlockParents &parent, BlockP inMST.add(block); if (trace()) - traceMsg(comp(), "Add block_%d to the MST\n", block->getNumber()); + { + //traceMsg(comp(), "Add block_%d to the MST\n", block->getNumber()); + } AdjacentBlockIterator adj(comp(), block); while (adj.current()) { TR::Block *candidate = adj.current(); if (trace()) - traceMsg(comp(), " adj block_%d weight %d\n", candidate->getNumber(), adj.frequency()); + { + //traceMsg(comp(), " adj block_%d weight %d\n", candidate->getNumber(), adj.frequency()); + traceMsg(comp(), "%s.%s:%d:%d:%d\n", classname, name, block->getNumber(), candidate->getNumber(), adj.frequency()); + } if (!inMST.contains(candidate) && weights[candidate] > -adj.frequency()) { weights[candidate] = -adj.frequency(); @@ -963,6 +980,16 @@ int32_t TR_JProfilingBlock::perform() if (trace()) parent.dump(comp()); +// TODO: MicroJIT: Remove this when ready to implement counter insertion +#if defined(J9VM_OPT_MICROJIT) + J9Method *method = static_cast(comp()->getMethodBeingCompiled())->ramMethod(); + if (TR::Options::getJITCmdLineOptions()->_mjitEnabled && comp()->fej9()->isMJITExtendedFlagsMethod(method) && comp()->getMethodBeingCompiled()->isInterpreted()) + { + comp()->setSkippedJProfilingBlock(); + return 0; + } +#endif + TR::BlockChecklist countedBlocks(comp()); int32_t numEdges = processCFGForCounting(parent, countedBlocks, loopBack); diff --git a/runtime/compiler/runtime/MetaData.cpp b/runtime/compiler/runtime/MetaData.cpp index 3160aeb2507..eb38212d69c 100644 --- a/runtime/compiler/runtime/MetaData.cpp +++ b/runtime/compiler/runtime/MetaData.cpp @@ -59,6 +59,9 @@ #include "il/Symbol.hpp" #include "il/TreeTop.hpp" #include "il/TreeTop_inlines.hpp" +#if defined(J9VM_OPT_MICROJIT) +#include "microjit/ExceptionTable.hpp" +#endif /* J9VM_OPT_MICROJIT */ #include "runtime/ArtifactManager.hpp" #include "runtime/CodeCache.hpp" #include "runtime/MethodMetaData.h" @@ -227,6 +230,52 @@ createExceptionTable( } } +#if defined(J9VM_OPT_MICROJIT) +static void +createMJITExceptionTable( + TR_MethodMetaData *data, + MJIT::ExceptionTableEntryIterator & exceptionIterator, + bool fourByteOffsets, + TR::Compilation *comp) + { + uint8_t *cursor = (uint8_t *)data + sizeof(TR_MethodMetaData); + + for (TR_ExceptionTableEntry *e = exceptionIterator.getFirst(); e; e = exceptionIterator.getNext()) + { + if (fourByteOffsets) + { + *(uint32_t *)cursor = e->_instructionStartPC, cursor += 4; + *(uint32_t *)cursor = e->_instructionEndPC, cursor += 4; + *(uint32_t *)cursor = e->_instructionHandlerPC, cursor += 4; + *(uint32_t *)cursor = e->_catchType, cursor += 4; + if (comp->fej9()->isAOT_DEPRECATED_DO_NOT_USE()) + *(uintptr_t *)cursor = (uintptr_t)e->_byteCodeInfo.getCallerIndex(), cursor += sizeof(uintptr_t); + else + *(uintptr_t *)cursor = (uintptr_t)e->_method->resolvedMethodAddress(), cursor += sizeof(uintptr_t); + } + else + { + TR_ASSERT(e->_catchType <= 0xFFFF, "the cp index for the catch type requires 17 bits!"); + + *(uint16_t *)cursor = e->_instructionStartPC, cursor += 2; + *(uint16_t *)cursor = e->_instructionEndPC, cursor += 2; + *(uint16_t *)cursor = e->_instructionHandlerPC, cursor += 2; + *(uint16_t *)cursor = e->_catchType, cursor += 2; + } + + // Ensure that InstructionBoundaries are initialized properly. + // + TR_ASSERT(e->_instructionStartPC != UINT_MAX, "Uninitialized startPC in exception table entry: %p", e); + TR_ASSERT(e->_instructionEndPC != UINT_MAX, "Uninitialized endPC in exception table entry: %p", e); + + if (comp->getOption(TR_FullSpeedDebug) && !debug("dontEmitFSDInfo")) + { + *(uint32_t *)cursor = e->_byteCodeInfo.getByteCodeIndex(); + cursor += 4; + } + } + } +#endif /* J9VM_OPT_MICROJIT */ // This method is used to calculate the size (in number of bytes) // that this internal pointer map will require in the J9 GC map @@ -1087,6 +1136,51 @@ static int32_t calculateExceptionsSize( return exceptionsSize; } +#if defined(J9VM_OPT_MICROJIT) +static int32_t +calculateMJITExceptionsSize( + TR::Compilation *comp, + MJIT::ExceptionTableEntryIterator& exceptionIterator, + bool& fourByteExceptionTableEntries, + uint32_t& numberOfExceptionRangesWithBits) + { + uint32_t exceptionsSize = 0; + uint32_t numberOfExceptionRanges = exceptionIterator.size(); + numberOfExceptionRangesWithBits = numberOfExceptionRanges; + if (numberOfExceptionRanges) + { + if (numberOfExceptionRanges > 0x3FFF) + return -1; // our meta data representation only has 14 bits for the number of exception ranges + + if (!fourByteExceptionTableEntries) + for (TR_ExceptionTableEntry *e = exceptionIterator.getFirst(); e; e = exceptionIterator.getNext()) + if (e->_catchType > 0xFFFF || !e->_method->isSameMethod(comp->getCurrentMethod())) + { + fourByteExceptionTableEntries = true; + break; + } + + int32_t entrySize; + if (fourByteExceptionTableEntries) + { + entrySize = sizeof(J9JIT32BitExceptionTableEntry); + numberOfExceptionRangesWithBits |= 0x8000; + } + else + entrySize = sizeof(J9JIT16BitExceptionTableEntry); + + if (comp->getOption(TR_FullSpeedDebug)) + { + numberOfExceptionRangesWithBits |= 0x4000; + entrySize += 4; + } + + exceptionsSize = numberOfExceptionRanges * entrySize; + } + return exceptionsSize; + } +#endif /* J9VM_OPT_MICROJIT */ + static void populateBodyInfo( TR::Compilation *comp, @@ -1749,3 +1843,180 @@ createMethodMetaData( return data; } + +#if defined(J9VM_OPT_MICROJIT) +// --------------------------------------------------------------------------- +// The routine that sequences the creation of the meta-data for +// a method compiled for MJIT +TR_MethodMetaData * +createMJITMethodMetaData( + TR_J9VMBase & vmArg, + TR_ResolvedMethod *vmMethod, + TR::Compilation *comp) + { + // --------------------------------------------------------------------------- + // This function needs to create the meta-data structures for: + // GC maps, Exceptions, inlining calls, stack atlas, + // GPU optimizations and internal pointer table + TR_J9VMBase *vm = &vmArg; + MJIT::ExceptionTableEntryIterator exceptionIterator(comp); + TR::ResolvedMethodSymbol *methodSymbol = comp->getJittedMethodSymbol(); + TR::CodeGenerator *cg = comp->cg(); + TR::GCStackAtlas *trStackAtlas = cg->getStackAtlas(); + + // --------------------------------------------------------------------------- + // Find unmergeable GC maps + // TODO: MicroJIT: Create mechanism like treetop to support this at a MicroJIT level + GCStackMapSet nonmergeableBCI(std::less(), cg->trMemory()->heapMemoryRegion()); + // if (comp->getOptions()->getReportByteCodeInfoAtCatchBlock()) + // { + // for (TR::TreeTop* treetop = comp->getStartTree(); treetop; treetop = treetop->getNextTreeTop()) + // { + // if (treetop->getNode()->getOpCodeValue() == TR::BBStart) + // { + // TR::Block* block = treetop->getNode()->getBlock(); + // if (block->getCatchBlockExtension()) + // { + // nonmergeableBCI.insert(block->getFirstInstruction()->getGCMap()); + // } + // } + // } + // } + + bool fourByteOffsets = RANGE_NEEDS_FOUR_BYTE_OFFSET(cg->getCodeLength()); + + uint32_t tableSize = sizeof(TR_MethodMetaData); + + uint32_t numberOfSlotsMapped = trStackAtlas->getNumberOfSlotsMapped(); + uint32_t numberOfMapBytes = (numberOfSlotsMapped + 1 + 7) >> 3; // + 1 to indicate whether there's a live monitor map + if (comp->isAlignStackMaps()) + { + numberOfMapBytes = (numberOfMapBytes + 3) & ~3; + } + + // -------------------------------------------------------------------------- + // Computing the size of the exception table + // fourByteExceptionTableEntries and numberOfExceptionRangesWithBits will be + // computed in calculateExceptionSize + // + bool fourByteExceptionTableEntries = fourByteOffsets; + uint32_t numberOfExceptionRangesWithBits = 0; + int32_t exceptionsSize = calculateMJITExceptionsSize(comp, exceptionIterator, fourByteExceptionTableEntries, numberOfExceptionRangesWithBits); + + if (exceptionsSize == -1) + { + return NULL; + } + // TODO: MicroJIT: once exception support is added to MicroJIT uncomment this line + // tableSize += exceptionsSize; + uint32_t exceptionTableSize = tableSize; // Size of the meta data header and exception entries + + // -------------------------------------------------------------------------- + // Computing the size of the inlinedCall + // + + // TODO: MicroJIT: Do we care about inline calls? + uint32_t inlinedCallSize = 0; + + // Add size of stack atlas to allocate + // + int32_t sizeOfStackAtlasInBytes = calculateSizeOfStackAtlas(vm, cg, fourByteOffsets, numberOfSlotsMapped, numberOfMapBytes, comp, nonmergeableBCI); + + tableSize += sizeOfStackAtlasInBytes; + + // Add size of internal ptr data structure to allocate + // + tableSize += calculateSizeOfInternalPtrMap(comp); + + // Escape analysis change for compressed pointers + // + TR_GCStackAllocMap *stackAllocMap = trStackAtlas->getStackAllocMap(); + if (stackAllocMap) + { + tableSize += numberOfMapBytes + sizeof(uintptr_t); + } + + // Internal pointer map + // + + /* Legend of the info stored at "data". From top to bottom, the address increases + Exception Info + -------------- + Inline Info + -------------- + Stack Atlas + -------------- + Stack alloc map (if exists) + -------------- + internal pointer map + */ + + TR_MethodMetaData *data = (TR_MethodMetaData*) vmMethod->allocateException(tableSize, comp); + + populateBodyInfo(comp, vm, data); + + data->startPC = (UDATA)comp->cg()->getCodeStart(); + data->endPC = (UDATA)comp->cg()->getCodeEnd(); + data->startColdPC = (UDATA)0; + data->endWarmPC = data->endPC; + data->codeCacheAlloc = (UDATA)comp->cg()->getBinaryBufferStart(); + data->flags |= JIT_METADATA_GC_MAP_32_BIT_OFFSETS; + + data->hotness = comp->getMethodHotness(); + data->totalFrameSize = comp->cg()->getFrameSizeInBytes()/TR::Compiler->om.sizeofReferenceAddress(); + data->slots = vmMethod->numberOfParameterSlots(); + data->scalarTempSlots = methodSymbol->getScalarTempSlots(); + data->objectTempSlots = methodSymbol->getObjectTempSlots(); + data->prologuePushes = methodSymbol->getProloguePushSlots(); + data->numExcptionRanges = numberOfExceptionRangesWithBits; + data->tempOffset = comp->cg()->getStackAtlas()->getNumberOfPendingPushSlots(); + data->size = tableSize; + + data->gcStackAtlas = createStackAtlas(vm, comp->cg(), fourByteOffsets, numberOfSlotsMapped, numberOfMapBytes, comp, ((uint8_t *)data + exceptionTableSize + inlinedCallSize), sizeOfStackAtlasInBytes, nonmergeableBCI); + + createMJITExceptionTable(data, exceptionIterator, fourByteExceptionTableEntries, comp); + + data->registerSaveDescription = comp->cg()->getRegisterSaveDescription(); + + data->inlinedCalls = NULL; + data->riData = NULL; + + if (!(vm->_jitConfig->runtimeFlags & J9JIT_TOSS_CODE) && !vm->isAOT_DEPRECATED_DO_NOT_USE()) + { + TR_TranslationArtifactManager *artifactManager = TR_TranslationArtifactManager::getGlobalArtifactManager(); + TR_TranslationArtifactManager::CriticalSection updateMetaData; + + if ( !(artifactManager->insertArtifact( static_cast(data) ) ) ) + { + // Insert trace point here for insertion failure + } + if (vm->isAnonymousClass(((TR_ResolvedJ9Method*)vmMethod)->romClassPtr())) + { + J9Class *j9clazz = ((TR_ResolvedJ9Method*)vmMethod)->constantPoolHdr(); + J9CLASS_EXTENDED_FLAGS_SET(j9clazz, J9ClassContainsJittedMethods); + data->prevMethod = NULL; + data->nextMethod = j9clazz->jitMetaDataList; + if (j9clazz->jitMetaDataList) + j9clazz->jitMetaDataList->prevMethod = data; + j9clazz->jitMetaDataList = data; + } + else + { + J9ClassLoader *classLoader = ((TR_ResolvedJ9Method*)vmMethod)->getClassLoader(); + classLoader->flags |= J9CLASSLOADER_CONTAINS_JITTED_METHODS; + data->prevMethod = NULL; + data->nextMethod = classLoader->jitMetaDataList; + if (classLoader->jitMetaDataList) + classLoader->jitMetaDataList->prevMethod = data; + classLoader->jitMetaDataList = data; + } + } + + if (comp->getOption(TR_TraceCG) && comp->getOutFile() != NULL) + { + comp->getDebug()->print(data, vmMethod, fourByteOffsets); + } + + return data; + } +#endif /* J9VM_OPT_MICROJIT */ diff --git a/runtime/compiler/x/amd64/codegen/AMD64PrivateLinkage.cpp b/runtime/compiler/x/amd64/codegen/AMD64PrivateLinkage.cpp index d75b6187ab4..04ec1ed9aa7 100644 --- a/runtime/compiler/x/amd64/codegen/AMD64PrivateLinkage.cpp +++ b/runtime/compiler/x/amd64/codegen/AMD64PrivateLinkage.cpp @@ -87,13 +87,20 @@ enum J9::X86::AMD64::PrivateLinkage::PrivateLinkage(TR::CodeGenerator *cg) : J9::X86::PrivateLinkage(cg) { + setOffsetToFirstParm(RETURN_ADDRESS_SIZE); + setProperties(cg,&_properties); + } + +void J9::X86::AMD64::setProperties( + TR::CodeGenerator *cg, + TR::X86LinkageProperties *properties) + { TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe()); const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg; uint8_t r, p; TR::RealRegister::RegNum metaReg = TR::RealRegister::ebp; - - _properties._properties = + properties->_properties = EightBytePointers | EightByteParmSlots | IntegersInRegisters | LongsInRegisters | FloatsInRegisters | NeedsThunksForIndirectCalls @@ -101,98 +108,97 @@ J9::X86::AMD64::PrivateLinkage::PrivateLinkage(TR::CodeGenerator *cg) ; if (!fej9->pushesOutgoingArgsAtCallSite(cg->comp())) - _properties._properties |= CallerCleanup | ReservesOutgoingArgsInPrologue; + properties->_properties |= CallerCleanup | ReservesOutgoingArgsInPrologue; // Integer arguments p=0; - _properties._firstIntegerArgumentRegister = p; - _properties._argumentRegisters[p++] = TR::RealRegister::eax; - _properties._argumentRegisters[p++] = TR::RealRegister::esi; - _properties._argumentRegisters[p++] = TR::RealRegister::edx; - _properties._argumentRegisters[p++] = TR::RealRegister::ecx; + properties->_firstIntegerArgumentRegister = p; + properties->_argumentRegisters[p++] = TR::RealRegister::eax; + properties->_argumentRegisters[p++] = TR::RealRegister::esi; + properties->_argumentRegisters[p++] = TR::RealRegister::edx; + properties->_argumentRegisters[p++] = TR::RealRegister::ecx; TR_ASSERT(p == NUM_INTEGER_LINKAGE_REGS, "assertion failure"); - _properties._numIntegerArgumentRegisters = NUM_INTEGER_LINKAGE_REGS; + properties->_numIntegerArgumentRegisters = NUM_INTEGER_LINKAGE_REGS; // Float arguments - _properties._firstFloatArgumentRegister = p; + properties->_firstFloatArgumentRegister = p; for(r=0; r<=7; r++) - _properties._argumentRegisters[p++] = TR::RealRegister::xmmIndex(r); + properties->_argumentRegisters[p++] = TR::RealRegister::xmmIndex(r); TR_ASSERT(p == NUM_INTEGER_LINKAGE_REGS + NUM_FLOAT_LINKAGE_REGS, "assertion failure"); - _properties._numFloatArgumentRegisters = NUM_FLOAT_LINKAGE_REGS; + properties->_numFloatArgumentRegisters = NUM_FLOAT_LINKAGE_REGS; // Preserved p=0; - _properties._preservedRegisters[p++] = TR::RealRegister::ebx; - _properties._preservedRegisterMapForGC = TR::RealRegister::ebxMask; + properties->_preservedRegisters[p++] = TR::RealRegister::ebx; + properties->_preservedRegisterMapForGC = TR::RealRegister::ebxMask; int32_t lastPreservedRegister = 9; // changed to 9 for liberty, it used to be 15 for (r=9; r<=lastPreservedRegister; r++) { - _properties._preservedRegisters[p++] = TR::RealRegister::rIndex(r); - _properties._preservedRegisterMapForGC |= TR::RealRegister::gprMask(TR::RealRegister::rIndex(r)); + properties->_preservedRegisters[p++] = TR::RealRegister::rIndex(r); + properties->_preservedRegisterMapForGC |= TR::RealRegister::gprMask(TR::RealRegister::rIndex(r)); } - _properties._numberOfPreservedGPRegisters = p; + properties->_numberOfPreservedGPRegisters = p; if (!INTERPRETER_CLOBBERS_XMMS) for (r=8; r<=15; r++) { - _properties._preservedRegisters[p++] = TR::RealRegister::xmmIndex(r); - _properties._preservedRegisterMapForGC |= TR::RealRegister::xmmrMask(TR::RealRegister::xmmIndex(r)); + properties->_preservedRegisters[p++] = TR::RealRegister::xmmIndex(r); + properties->_preservedRegisterMapForGC |= TR::RealRegister::xmmrMask(TR::RealRegister::xmmIndex(r)); } - _properties._numberOfPreservedXMMRegisters = p - _properties._numberOfPreservedGPRegisters; - _properties._maxRegistersPreservedInPrologue = p; - _properties._preservedRegisters[p++] = metaReg; - _properties._preservedRegisters[p++] = TR::RealRegister::esp; - _properties._numPreservedRegisters = p; + properties->_numberOfPreservedXMMRegisters = p - properties->_numberOfPreservedGPRegisters; + properties->_maxRegistersPreservedInPrologue = p; + properties->_preservedRegisters[p++] = metaReg; + properties->_preservedRegisters[p++] = TR::RealRegister::esp; + properties->_numPreservedRegisters = p; // Other - _properties._returnRegisters[0] = TR::RealRegister::eax; - _properties._returnRegisters[1] = TR::RealRegister::xmm0; - _properties._returnRegisters[2] = noReg; + properties->_returnRegisters[0] = TR::RealRegister::eax; + properties->_returnRegisters[1] = TR::RealRegister::xmm0; + properties->_returnRegisters[2] = noReg; - _properties._scratchRegisters[0] = TR::RealRegister::edi; - _properties._scratchRegisters[1] = TR::RealRegister::r8; - _properties._numScratchRegisters = 2; + properties->_scratchRegisters[0] = TR::RealRegister::edi; + properties->_scratchRegisters[1] = TR::RealRegister::r8; + properties->_numScratchRegisters = 2; - _properties._vtableIndexArgumentRegister = TR::RealRegister::r8; - _properties._j9methodArgumentRegister = TR::RealRegister::edi; - _properties._framePointerRegister = TR::RealRegister::esp; - _properties._methodMetaDataRegister = metaReg; + properties->_vtableIndexArgumentRegister = TR::RealRegister::r8; + properties->_j9methodArgumentRegister = TR::RealRegister::edi; + properties->_framePointerRegister = TR::RealRegister::esp; + properties->_methodMetaDataRegister = metaReg; - _properties._numberOfVolatileGPRegisters = 6; // rax, rsi, rdx, rcx, rdi, r8 - _properties._numberOfVolatileXMMRegisters = INTERPRETER_CLOBBERS_XMMS? 16 : 8; // xmm0-xmm7 + properties->_numberOfVolatileGPRegisters = 6; // rax, rsi, rdx, rcx, rdi, r8 + properties->_numberOfVolatileXMMRegisters = INTERPRETER_CLOBBERS_XMMS? 16 : 8; // xmm0-xmm7 // Offsets relative to where the frame pointer *would* point if we had one; // namely, the local with the highest address (ie. the "first" local) - setOffsetToFirstParm(RETURN_ADDRESS_SIZE); - _properties._offsetToFirstLocal = 0; + properties->_offsetToFirstLocal = 0; // TODO: Need a better way to build the flags so they match the info above // - memset(_properties._registerFlags, 0, sizeof(_properties._registerFlags)); + memset(properties->_registerFlags, 0, sizeof(properties->_registerFlags)); // Integer arguments/return - _properties._registerFlags[TR::RealRegister::eax] = IntegerArgument | IntegerReturn; - _properties._registerFlags[TR::RealRegister::esi] = IntegerArgument; - _properties._registerFlags[TR::RealRegister::edx] = IntegerArgument; - _properties._registerFlags[TR::RealRegister::ecx] = IntegerArgument; + properties->_registerFlags[TR::RealRegister::eax] = IntegerArgument | IntegerReturn; + properties->_registerFlags[TR::RealRegister::esi] = IntegerArgument; + properties->_registerFlags[TR::RealRegister::edx] = IntegerArgument; + properties->_registerFlags[TR::RealRegister::ecx] = IntegerArgument; // Float arguments/return - _properties._registerFlags[TR::RealRegister::xmm0] = FloatArgument | FloatReturn; + properties->_registerFlags[TR::RealRegister::xmm0] = FloatArgument | FloatReturn; for(r=1; r <= 7; r++) - _properties._registerFlags[TR::RealRegister::xmmIndex(r)] = FloatArgument; + properties->_registerFlags[TR::RealRegister::xmmIndex(r)] = FloatArgument; // Preserved - _properties._registerFlags[TR::RealRegister::ebx] = Preserved; - _properties._registerFlags[TR::RealRegister::esp] = Preserved; - _properties._registerFlags[metaReg] = Preserved; + properties->_registerFlags[TR::RealRegister::ebx] = Preserved; + properties->_registerFlags[TR::RealRegister::esp] = Preserved; + properties->_registerFlags[metaReg] = Preserved; for(r=9; r <= lastPreservedRegister; r++) - _properties._registerFlags[TR::RealRegister::rIndex(r)] = Preserved; + properties->_registerFlags[TR::RealRegister::rIndex(r)] = Preserved; if(!INTERPRETER_CLOBBERS_XMMS) for(r=8; r <= 15; r++) - _properties._registerFlags[TR::RealRegister::xmmIndex(r)] = Preserved; + properties->_registerFlags[TR::RealRegister::xmmIndex(r)] = Preserved; p = 0; @@ -201,8 +207,8 @@ J9::X86::AMD64::PrivateLinkage::PrivateLinkage(TR::CodeGenerator *cg) // Volatiles that aren't linkage regs if (TR::Machine::numGPRRegsWithheld(cg) == 0) { - _properties._allocationOrder[p++] = TR::RealRegister::edi; - _properties._allocationOrder[p++] = TR::RealRegister::r8; + properties->_allocationOrder[p++] = TR::RealRegister::edi; + properties->_allocationOrder[p++] = TR::RealRegister::r8; } else { @@ -210,20 +216,20 @@ J9::X86::AMD64::PrivateLinkage::PrivateLinkage(TR::CodeGenerator *cg) } // Linkage regs in reverse order - _properties._allocationOrder[p++] = TR::RealRegister::ecx; - _properties._allocationOrder[p++] = TR::RealRegister::edx; - _properties._allocationOrder[p++] = TR::RealRegister::esi; - _properties._allocationOrder[p++] = TR::RealRegister::eax; + properties->_allocationOrder[p++] = TR::RealRegister::ecx; + properties->_allocationOrder[p++] = TR::RealRegister::edx; + properties->_allocationOrder[p++] = TR::RealRegister::esi; + properties->_allocationOrder[p++] = TR::RealRegister::eax; } // Preserved regs - _properties._allocationOrder[p++] = TR::RealRegister::ebx; - _properties._allocationOrder[p++] = TR::RealRegister::r9; - _properties._allocationOrder[p++] = TR::RealRegister::r10; - _properties._allocationOrder[p++] = TR::RealRegister::r11; - _properties._allocationOrder[p++] = TR::RealRegister::r12; - _properties._allocationOrder[p++] = TR::RealRegister::r13; - _properties._allocationOrder[p++] = TR::RealRegister::r14; - _properties._allocationOrder[p++] = TR::RealRegister::r15; + properties->_allocationOrder[p++] = TR::RealRegister::ebx; + properties->_allocationOrder[p++] = TR::RealRegister::r9; + properties->_allocationOrder[p++] = TR::RealRegister::r10; + properties->_allocationOrder[p++] = TR::RealRegister::r11; + properties->_allocationOrder[p++] = TR::RealRegister::r12; + properties->_allocationOrder[p++] = TR::RealRegister::r13; + properties->_allocationOrder[p++] = TR::RealRegister::r14; + properties->_allocationOrder[p++] = TR::RealRegister::r15; TR_ASSERT(p == machine()->getNumGlobalGPRs(), "assertion failure"); @@ -232,34 +238,33 @@ J9::X86::AMD64::PrivateLinkage::PrivateLinkage(TR::CodeGenerator *cg) // Linkage regs in reverse order if (TR::Machine::numRegsWithheld(cg) == 0) { - _properties._allocationOrder[p++] = TR::RealRegister::xmm7; - _properties._allocationOrder[p++] = TR::RealRegister::xmm6; + properties->_allocationOrder[p++] = TR::RealRegister::xmm7; + properties->_allocationOrder[p++] = TR::RealRegister::xmm6; } else { TR_ASSERT(TR::Machine::numRegsWithheld(cg) == 2, "numRegsWithheld: only 0 and 2 currently supported"); } - _properties._allocationOrder[p++] = TR::RealRegister::xmm5; - _properties._allocationOrder[p++] = TR::RealRegister::xmm4; - _properties._allocationOrder[p++] = TR::RealRegister::xmm3; - _properties._allocationOrder[p++] = TR::RealRegister::xmm2; - _properties._allocationOrder[p++] = TR::RealRegister::xmm1; - _properties._allocationOrder[p++] = TR::RealRegister::xmm0; + properties->_allocationOrder[p++] = TR::RealRegister::xmm5; + properties->_allocationOrder[p++] = TR::RealRegister::xmm4; + properties->_allocationOrder[p++] = TR::RealRegister::xmm3; + properties->_allocationOrder[p++] = TR::RealRegister::xmm2; + properties->_allocationOrder[p++] = TR::RealRegister::xmm1; + properties->_allocationOrder[p++] = TR::RealRegister::xmm0; } // Preserved regs - _properties._allocationOrder[p++] = TR::RealRegister::xmm8; - _properties._allocationOrder[p++] = TR::RealRegister::xmm9; - _properties._allocationOrder[p++] = TR::RealRegister::xmm10; - _properties._allocationOrder[p++] = TR::RealRegister::xmm11; - _properties._allocationOrder[p++] = TR::RealRegister::xmm12; - _properties._allocationOrder[p++] = TR::RealRegister::xmm13; - _properties._allocationOrder[p++] = TR::RealRegister::xmm14; - _properties._allocationOrder[p++] = TR::RealRegister::xmm15; + properties->_allocationOrder[p++] = TR::RealRegister::xmm8; + properties->_allocationOrder[p++] = TR::RealRegister::xmm9; + properties->_allocationOrder[p++] = TR::RealRegister::xmm10; + properties->_allocationOrder[p++] = TR::RealRegister::xmm11; + properties->_allocationOrder[p++] = TR::RealRegister::xmm12; + properties->_allocationOrder[p++] = TR::RealRegister::xmm13; + properties->_allocationOrder[p++] = TR::RealRegister::xmm14; + properties->_allocationOrder[p++] = TR::RealRegister::xmm15; TR_ASSERT(p == (machine()->getNumGlobalGPRs() + machine()->_numGlobalFPRs), "assertion failure"); } - //////////////////////////////////////////////// // // Argument manipulation diff --git a/runtime/compiler/x/amd64/codegen/AMD64PrivateLinkage.hpp b/runtime/compiler/x/amd64/codegen/AMD64PrivateLinkage.hpp index 10bdc3fe637..f0137068489 100644 --- a/runtime/compiler/x/amd64/codegen/AMD64PrivateLinkage.hpp +++ b/runtime/compiler/x/amd64/codegen/AMD64PrivateLinkage.hpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2020 IBM Corp. and others + * Copyright (c) 2000, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -38,6 +38,7 @@ namespace X86 namespace AMD64 { +void setProperties(TR::CodeGenerator *cg,TR::X86LinkageProperties *properties); class PrivateLinkage : public J9::X86::PrivateLinkage { diff --git a/runtime/include/j9cfg.h.in b/runtime/include/j9cfg.h.in index f2755a06b39..251eaa0e615 100644 --- a/runtime/include/j9cfg.h.in +++ b/runtime/include/j9cfg.h.in @@ -225,6 +225,7 @@ extern "C" { #cmakedefine J9VM_OPT_MEMORY_CHECK_SUPPORT #cmakedefine J9VM_OPT_METHOD_HANDLE #cmakedefine J9VM_OPT_METHOD_HANDLE_COMMON +#cmakedefine J9VM_OPT_MICROJIT #cmakedefine J9VM_OPT_MODULE #cmakedefine J9VM_OPT_MULTI_VM #cmakedefine J9VM_OPT_NATIVE_CHARACTER_CONVERTER diff --git a/runtime/makelib/uma.properties b/runtime/makelib/uma.properties index e013dcc74c9..93186af50d6 100644 --- a/runtime/makelib/uma.properties +++ b/runtime/makelib/uma.properties @@ -1,4 +1,4 @@ -# Copyright (c) 2009, 2020 IBM Corp. and others +# Copyright (c) 2009, 2022 IBM Corp. and others # # This program and the accompanying materials are made available under # the terms of the Eclipse Public License 2.0 which accompanies this @@ -33,4 +33,4 @@ JCL_HS60=jclhs60_ makefile_phase_prefix=.NOTPARALLEL # Define phases -phase_list=omr core jit mjit quick sjit util j2se jvmti_tests rom offload +phase_list=omr core jit quick sjit util j2se jvmti_tests rom offload diff --git a/runtime/oti/j9consts.h b/runtime/oti/j9consts.h index d58b8a56d20..d5f5ae850ed 100644 --- a/runtime/oti/j9consts.h +++ b/runtime/oti/j9consts.h @@ -700,11 +700,13 @@ extern "C" { #define J9_BCLOOP_EXIT_INTERPRETER 0x16 #define J9_BCLOOP_FILL_OSR_BUFFER 0x17 +/* Extended Method Flags */ #define J9_RAS_METHOD_UNSEEN 0x0 #define J9_RAS_METHOD_SEEN 0x1 #define J9_RAS_METHOD_TRACING 0x2 #define J9_RAS_METHOD_TRACE_ARGS 0x4 #define J9_RAS_METHOD_TRIGGERING 0x8 +#define J9_MJIT_FAILED_COMPILE 0x10 #define J9_RAS_MASK 0xF #define J9_JNI_OFFLOAD_SWITCH_CREATE_JAVA_VM 0x1