diff --git a/sljit_src/sljitConfigInternal.h b/sljit_src/sljitConfigInternal.h index 1769f42a..c800fbe9 100644 --- a/sljit_src/sljitConfigInternal.h +++ b/sljit_src/sljitConfigInternal.h @@ -786,6 +786,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 +#define SLJIT_ATOMIC_WIDTH 32 #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) diff --git a/sljit_src/sljitLir.h b/sljit_src/sljitLir.h index c7a1c194..3955b604 100644 --- a/sljit_src/sljitLir.h +++ b/sljit_src/sljitLir.h @@ -1804,7 +1804,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler - the memory operation (op) and the base address (stored in mem_reg) passed to the load/store operations must be the same (the mem_reg can be a different register, only its value must be the same) - - an store must always follow a load for the same transaction. + - an store must always follow a load for the same transaction, but + loads might be abandoned + - if the CPU defines a minimum bit width supported (SLJIT_ATOMIC_WIDTH) + then the memory address must be aligned to it op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all signed loads such as SLJIT_MOV32_S16 diff --git a/sljit_src/sljitNativeLOONGARCH_64.c b/sljit_src/sljitNativeLOONGARCH_64.c index eea8a328..7902de42 100644 --- a/sljit_src/sljitNativeLOONGARCH_64.c +++ b/sljit_src/sljitNativeLOONGARCH_64.c @@ -62,7 +62,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { /* LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them): -| Format name | Composition | +| Format name | Composition | | 2R | Opcode + Rj + Rd | | 3R | Opcode + Rk + Rj + Rd | | 4R | Opcode + Ra + Rk + Rj + Rd | @@ -2449,8 +2449,10 @@ static sljit_ins get_jump_instruction(sljit_s32 type) { switch (type) { case SLJIT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO); case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_STORED: return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO); case SLJIT_LESS: case SLJIT_GREATER: @@ -2734,6 +2736,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1))); src_r = dst_r; break; + case SLJIT_ATOMIC_STORED: + case SLJIT_ATOMIC_NOT_STORED: + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1))); + src_r = dst_r; + invert ^= 0x1; + break; case SLJIT_OVERFLOW: case SLJIT_NOT_OVERFLOW: if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { @@ -2933,15 +2941,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler sljit_s32 dst_reg, sljit_s32 mem_reg) { - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst_reg); - SLJIT_UNUSED_ARG(mem_reg); + sljit_ins ins = LL_W; + sljit_s32 dst = dst_reg; CHECK_ERROR(); CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); - return SLJIT_ERR_UNSUPPORTED; + op = GET_OPCODE(op); + switch (op) { + case SLJIT_MOV_P: + case SLJIT_MOV: + ins = LL_D; + break; +#ifdef SLJIT_ATOMIC_EMULATION + case SLJIT_MOV_U16: + dst = TMP_REG1; + break; +#endif /* SLJIT_ATOMIC_WIDTH */ + } + + FAIL_IF(push_inst(compiler, ins | RD(dst) | RJ(mem_reg))); + +#ifdef SLJIT_ATOMIC_EMULATION + switch (op) { + case SLJIT_MOV_U8: + return push_inst(compiler, ANDI | RD(dst_reg) | RJ(dst_reg) | IMM_I12(0xff)); +#if SLJIT_ATOMIC_WIDTH > 16 + case SLJIT_MOV_U16: + return push_inst(compiler, BSTRPICK_W | RD(dst_reg) | RJ(dst) | (15 << 16)); +#endif /* SLJIT_ATOMIC_WIDTH > 16 */ + } +#endif /* SLJIT_ATOMIC_EMULATION */ + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, @@ -2950,16 +2981,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler sljit_s32 mem_reg, sljit_s32 temp_reg) { - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(src_reg); - SLJIT_UNUSED_ARG(mem_reg); - SLJIT_UNUSED_ARG(temp_reg); + sljit_ins ins = SC_W; + sljit_ins chk = ORI | RD(EQUAL_FLAG) | RJ(temp_reg) | RK(TMP_ZERO); CHECK_ERROR(); CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); - return SLJIT_ERR_UNSUPPORTED; + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + FAIL_IF(push_inst(compiler, ANDI | RD(temp_reg) | RJ(src_reg) | IMM_I12(0xff))); + break; + case SLJIT_MOV_U16: + FAIL_IF(push_inst(compiler, BSTRINS_W | RD(temp_reg) | RJ(src_reg) | (15 << 16))); + break; +#ifdef SLJIT_ATOMIC_EMULATION + case SLJIT_MOV32: + case SLJIT_MOV_U32: + FAIL_IF(push_inst(compiler, BSTRINS_D | RD(temp_reg) | RJ(src_reg) | (31 << 16))); + break; +#endif /* SLJIT_ATOMIC_EMULATION */ + case SLJIT_MOV_P: + case SLJIT_MOV: + ins = SC_D; + /* FALLTHRU */ + default: + FAIL_IF(push_inst(compiler, ORI | RD(temp_reg) | RJ(src_reg) | RK(TMP_ZERO))); + break; + } + + FAIL_IF(push_inst(compiler, ins | RD(temp_reg) | RJ(mem_reg))); + + return chk ? push_inst(compiler, chk) : SLJIT_SUCCESS; } static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) diff --git a/test_src/sljitTest.c b/test_src/sljitTest.c index e90ba675..47b6e7b1 100644 --- a/test_src/sljitTest.c +++ b/test_src/sljitTest.c @@ -11537,14 +11537,22 @@ static void test92(void) { #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ || (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ - || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) /* Test atomic load and store. */ executable_code code; struct sljit_compiler *compiler = sljit_create_compiler(NULL, NULL); struct sljit_label *label; struct sljit_jump *jump; - sljit_sw buf[38]; + sljit_sw buf[40]; sljit_s32 i; +#ifndef SLJIT_ATOMIC_WIDTH +#define PADBYTE 0x55 + sljit_sw padding = WCONST(0x5555555555555555, 0x55555555); +#else +#define PADBYTE 0x00 + sljit_sw padding = WCONST(0x5555555500000000, 0); +#endif /* !SLJIT_ATOMIC_WIDTH */ if (verbose) printf("Run test92\n"); @@ -11552,7 +11560,7 @@ static void test92(void) FAILED(!compiler, "cannot create compiler\n"); for (i = 0; i < 36; i++) - buf[i] = WCONST(0x5555555555555555, 0x55555555); + buf[i] = padding; buf[0] = 4678; *(sljit_u8*)(buf + 2) = 78; @@ -11569,6 +11577,7 @@ static void test92(void) ((sljit_s32*)(buf + 33))[1] = -1; #endif /* SLJIT_64BIT_ARCHITECTURE */ buf[37] = WCONST(0x1122334444332211, 0x11222211); + buf[38] = SLJIT_FUNC_ADDR(test92); sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 5, 5, 0, 0, 2 * sizeof(sljit_sw)); @@ -11674,6 +11683,19 @@ static void test92(void) /* buf[19] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_sw), SLJIT_R0, 0); + /* case: SLJIT_MOV_P */ + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 38 * sizeof(sljit_sw)); + label = sljit_emit_label(compiler); + sljit_emit_atomic_load(compiler, SLJIT_MOV_P, SLJIT_R0, SLJIT_R2); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_S1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R1, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R0, 0, SLJIT_R2, 0); + /* buf[38] */ + sljit_emit_atomic_store(compiler, SLJIT_MOV_P | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R2, SLJIT_R1); + sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); + /* buf[39] */ + sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_MEM1(SLJIT_S0), 39 * sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 20 * sizeof(sljit_sw)); label = sljit_emit_label(compiler); sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R3, SLJIT_R1); @@ -11704,6 +11726,8 @@ static void test92(void) /* buf[25] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_R0, 0); +#ifndef SLJIT_ATOMIC_WIDTH + /* case: byte aligned lower offset */ sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 26 * sizeof(sljit_sw) + 1); label = sljit_emit_label(compiler); sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1); @@ -11717,6 +11741,7 @@ static void test92(void) /* buf[27] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 27 * sizeof(sljit_sw), SLJIT_S2, 0); + /* case: byte aligned higher offset */ sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 28 * sizeof(sljit_sw) + 2); label = sljit_emit_label(compiler); sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1); @@ -11730,7 +11755,10 @@ static void test92(void) sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_ATOMIC_NOT_STORED); /* buf[30] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 30 * sizeof(sljit_sw), SLJIT_R0, 0); +#endif /* !SLJIT_ATOMIC_WIDTH */ +#if (!defined SLJIT_ATOMIC_WIDTH || SLJIT_ATOMIC_WIDTH <= 16) + /* case: half alighed offset lower offset */ sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 31 * sizeof(sljit_sw) + 2); label = sljit_emit_label(compiler); sljit_emit_atomic_load(compiler, SLJIT_MOV_U16, SLJIT_R0, SLJIT_R1); @@ -11741,6 +11769,7 @@ static void test92(void) sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); /* buf[32] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 32 * sizeof(sljit_sw), SLJIT_S1, 0); +#endif /* SLJIT_ATOMIC_WIDTH <= 16 */ #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 33 * sizeof(sljit_sw) + sizeof(sljit_u32)); @@ -11753,12 +11782,13 @@ static void test92(void) sljit_emit_atomic_store(compiler, SLJIT_MOV32 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0); sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label); /* buf[34] */ - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 34 * sizeof(sljit_sw), SLJIT_S1, 0); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S0), 34 * sizeof(sljit_sw), SLJIT_S1, 0); #endif /* SLJIT_64BIT_ARCHITECTURE */ /* buf[35] */ sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 35 * sizeof(sljit_sw), SLJIT_ATOMIC_STORED); +#ifndef SLJIT_ATOMIC_WIDTH /* case50: abandoned atomic load is safe */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 37 * sizeof(sljit_sw)); @@ -11770,6 +11800,7 @@ static void test92(void) sljit_emit_atomic_store(compiler, SLJIT_MOV_U8, SLJIT_R2, SLJIT_R1, SLJIT_R0); /* buf[36] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 36 * sizeof(sljit_sw), SLJIT_R2, 0); +#endif /* !SLJIT_ATOMIC_WIDTH */ sljit_emit_return_void(compiler); @@ -11782,20 +11813,20 @@ static void test92(void) FAILED(buf[0] != -9856, "test92 case 1 failed\n"); FAILED(buf[1] != 4678, "test92 case 2 failed\n"); FAILED(*(sljit_u8*)(buf + 2) != 203, "test92 case 3 failed\n"); - FAILED(((sljit_u8*)(buf + 2))[1] != 0x55, "test92 case 4 failed\n"); + FAILED(((sljit_u8*)(buf + 2))[1] != PADBYTE, "test92 case 4 failed\n"); FAILED(buf[3] != 78, "test92 case 5 failed\n"); FAILED(buf[4] != 203, "test92 case 6 failed\n"); FAILED(*(sljit_u8*)(buf + 5) != 97, "test92 case 7 failed\n"); - FAILED(((sljit_u8*)(buf + 5))[1] != 0x55, "test92 case 8 failed\n"); + FAILED(((sljit_u8*)(buf + 5))[1] != PADBYTE, "test92 case 8 failed\n"); FAILED(*(sljit_u32*)(buf + 6) != 211, "test92 case 9 failed\n"); FAILED(buf[7] != (sljit_sw)(buf + 5), "test92 case 10 failed\n"); FAILED(buf[8] != 97, "test92 case 11 failed\n"); FAILED(*(sljit_u16*)(buf + 9) != (sljit_u16)(sljit_sw)(buf + 9), "test92 case 12 failed\n"); - FAILED(((sljit_u8*)(buf + 9))[2] != 0x55, "test92 case 13 failed\n"); + FAILED(((sljit_u8*)(buf + 9))[2] != PADBYTE, "test92 case 13 failed\n"); FAILED(buf[10] != 17897, "test92 case 14 failed\n"); FAILED(buf[11] != (sljit_sw)(buf + 9), "test92 case 15 failed\n"); FAILED(*(sljit_u16*)(buf + 12) != 41306, "test92 case 16 failed\n"); - FAILED(((sljit_u8*)(buf + 12))[2] != 0x55, "test92 case 17 failed\n"); + FAILED(((sljit_u8*)(buf + 12))[2] != PADBYTE, "test92 case 17 failed\n"); FAILED(*(sljit_u32*)(buf + 13) != 57812, "test92 case 18 failed\n"); FAILED(buf[14] != 41306, "test92 case 19 failed\n"); FAILED(*(sljit_u32*)(buf + 15) != 987654321, "test92 case 20 failed\n"); @@ -11813,34 +11844,45 @@ static void test92(void) #endif /* SLJIT_64BIT_ARCHITECTURE */ FAILED(buf[19] != -573621, "test92 case 26 failed\n"); FAILED(*(sljit_u8*)(buf + 20) != 240, "test92 case 27 failed\n"); - FAILED(((sljit_u8*)(buf + 20))[1] != 0x55, "test92 case 28 failed\n"); + FAILED(((sljit_u8*)(buf + 20))[1] != PADBYTE, "test92 case 28 failed\n"); FAILED(buf[21] != 192, "test92 case 29 failed\n"); FAILED(buf[22] != -5893, "test92 case 30 failed\n"); FAILED(buf[23] != 4059, "test92 case 31 failed\n"); FAILED(buf[24] != 6359, "test92 case 32 failed\n"); FAILED(buf[25] != (sljit_sw)(buf + 23), "test92 case 33 failed\n"); - FAILED(((sljit_u8*)(buf + 26))[0] != 0x55, "test92 case 34 failed\n"); +#ifndef SLJIT_ATOMIC_WIDTH + FAILED(((sljit_u8*)(buf + 26))[0] != PADBYTE, "test92 case 34 failed\n"); FAILED(((sljit_u8*)(buf + 26))[1] != 204, "test92 case 35 failed\n"); - FAILED(((sljit_u8*)(buf + 26))[2] != 0x55, "test92 case 36 failed\n"); + FAILED(((sljit_u8*)(buf + 26))[2] != PADBYTE, "test92 case 36 failed\n"); FAILED(buf[27] != 105, "test92 case 37 failed\n"); - FAILED(((sljit_u8*)(buf + 28))[1] != 0x55, "test92 case 38 failed\n"); + FAILED(((sljit_u8*)(buf + 28))[1] != PADBYTE, "test92 case 38 failed\n"); FAILED(((sljit_u8*)(buf + 28))[2] != 240, "test92 case 39 failed\n"); - FAILED(((sljit_u8*)(buf + 28))[3] != 0x55, "test92 case 40 failed\n"); + FAILED(((sljit_u8*)(buf + 28))[3] != PADBYTE, "test92 case 40 failed\n"); FAILED(buf[29] != 13, "test92 case 41 failed\n"); FAILED(buf[30] != 0, "test92 case 42 failed\n"); +#endif /* !SLJIT_ATOMIC_WIDTH */ +#if (!defined SLJIT_ATOMIC_WIDTH || SLJIT_ATOMIC_WIDTH <= 16) FAILED(((sljit_u16*)(buf + 31))[0] != 0x5555, "test92 case 43 failed\n"); FAILED(((sljit_u16*)(buf + 31))[1] != 51403, "test92 case 44 failed\n"); FAILED(buf[32] != 14876, "test92 case 45 failed\n"); +#endif /* SLJIT_ATOMIC_WIDTH <= 16 */ #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if PADBYTE != 0 FAILED(((sljit_u32*)(buf + 33))[0] != 0x55555555, "test92 case 46 failed\n"); +#endif FAILED(((sljit_u32*)(buf + 33))[1] != 0xdeadbeef, "test92 case 47 failed\n"); - FAILED(buf[34] != 0xffffffff, "test92 case 48 failed\n"); + FAILED(((sljit_u32*)(buf + 34))[0] != 0xffffffff, "test92 case 48 failed\n"); #endif /* SLJIT_64BIT_ARCHITECTURE */ FAILED(buf[35] != 1, "test92 case 49 failed\n"); +#ifndef SLJIT_ATOMIC_WIDTH FAILED(buf[36] != 0x11, "test92 case 50 (load) failed\n"); FAILED(((sljit_u8*)(buf + 37))[1] != buf[36], "test92 case 50 (store) failed\n"); +#endif /* !SLJIT_ATOMIC_WIDTH */ + FAILED(buf[38] != (sljit_sw)&buf[38], "test92 case 51 (store) failed \n"); + FAILED(buf[39] != SLJIT_FUNC_ADDR(test92), "test92 case 51 (load) failed \n"); sljit_free_code(code.code, NULL); +#undef PADBYTE #endif successful_tests++; }