diff --git a/internal/asm/amd64/consts.go b/internal/asm/amd64/consts.go index ec71cb86b1a..b7083e4eb44 100644 --- a/internal/asm/amd64/consts.go +++ b/internal/asm/amd64/consts.go @@ -323,16 +323,16 @@ const ( PADDB // PADDW is the PADDW instruction. https://www.felixcloutier.com/x86/paddb:paddw:paddd:paddq PADDW - // PADDL is the PADDD instruction. https://www.felixcloutier.com/x86/paddb:paddw:paddd:paddq - PADDL + // PADDD is the PADDD instruction. https://www.felixcloutier.com/x86/paddb:paddw:paddd:paddq + PADDD // PADDQ is the PADDQ instruction. https://www.felixcloutier.com/x86/paddb:paddw:paddd:paddq PADDQ // PSUBB is the PSUBB instruction. https://www.felixcloutier.com/x86/psubb:psubw:psubd PSUBB // PSUBW is the PSUBW instruction. https://www.felixcloutier.com/x86/psubb:psubw:psubd PSUBW - // PSUBL is the PSUBD instruction. https://www.felixcloutier.com/x86/psubb:psubw:psubd - PSUBL + // PSUBD is the PSUBD instruction. https://www.felixcloutier.com/x86/psubb:psubw:psubd + PSUBD // PSUBQ is the PSUBQ instruction. https://www.felixcloutier.com/x86/psubq PSUBQ // ADDPS is the ADDPS instruction. https://www.felixcloutier.com/x86/addps @@ -449,6 +449,112 @@ const ( PMAXUW // PMAXUB is the PMAXUB instruction https://www.felixcloutier.com/x86/pmaxub:pmaxuw PMAXUB + // PMULLW is the PMULLW instruction https://www.felixcloutier.com/x86/pmullw + PMULLW + // PMULLD is the PMULLD instruction https://www.felixcloutier.com/x86/pmulld:pmullq + PMULLD + // PMULUDQ is the PMULUDQ instruction https://www.felixcloutier.com/x86/pmuludq + PMULUDQ + // PSUBSB is the PSUBSB instruction https://www.felixcloutier.com/x86/psubsb:psubsw + PSUBSB + // PSUBSW is the PSUBSW instruction https://www.felixcloutier.com/x86/psubsb:psubsw + PSUBSW + // PSUBUSB is the PSUBUSB instruction https://www.felixcloutier.com/x86/psubusb:psubusw + PSUBUSB + // PSUBUSW is the PSUBUSW instruction https://www.felixcloutier.com/x86/psubusb:psubusw + PSUBUSW + // PADDSW is the PADDSW instruction https://www.felixcloutier.com/x86/paddsb:paddsw + PADDSW + // PADDSB is the PADDSB instruction https://www.felixcloutier.com/x86/paddsb:paddsw + PADDSB + // PADDUSW is the PADDUSW instruction https://www.felixcloutier.com/x86/paddusb:paddusw + PADDUSW + // PAVGB is the PAVGB instruction https://www.felixcloutier.com/x86/pavgb:pavgw + PAVGB + // PAVGW is the PAVGW instruction https://www.felixcloutier.com/x86/pavgb:pavgw + PAVGW + // PABSB is the PABSB instruction https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq + PABSB + // PABSW is the PABSW instruction https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq + PABSW + // PABSD is the PABSD instruction https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq + PABSD + // BLENDVPD is the BLENDVPD instruction https://www.felixcloutier.com/x86/blendvpd + BLENDVPD + // MAXPD is the MAXPD instruction https://www.felixcloutier.com/x86/maxpd + MAXPD + // MAXPS is the MAXPS instruction https://www.felixcloutier.com/x86/maxps + MAXPS + // MINPD is the MINPD instruction https://www.felixcloutier.com/x86/minpd + MINPD + // MINPS is the MINPS instruction https://www.felixcloutier.com/x86/minps + MINPS + // ANDNPD is the ANDNPD instruction https://www.felixcloutier.com/x86/andnpd + ANDNPD + // ANDNPS is the ANDNPS instruction https://www.felixcloutier.com/x86/andnps + ANDNPS + // MULPS is the MULPS instruction https://www.felixcloutier.com/x86/mulps + MULPS + // MULPD is the MULPD instruction https://www.felixcloutier.com/x86/mulpd + MULPD + // DIVPS is the DIVPS instruction https://www.felixcloutier.com/x86/divps + DIVPS + // DIVPD is the DIVPD instruction https://www.felixcloutier.com/x86/divpd + DIVPD + // SQRTPS is the SQRTPS instruction https://www.felixcloutier.com/x86/sqrtps + SQRTPS + // SQRTPD is the SQRTPD instruction https://www.felixcloutier.com/x86/sqrtpd + SQRTPD + // ROUNDPS is the ROUNDPS instruction https://www.felixcloutier.com/x86/roundps + ROUNDPS + // ROUNDPD is the ROUNDPD instruction https://www.felixcloutier.com/x86/roundpd + ROUNDPD + // PALIGNR is the PALIGNR instruction https://www.felixcloutier.com/x86/palignr + PALIGNR + // PUNPCKLWD is the PUNPCKLWD instruction https://www.felixcloutier.com/x86/punpcklbw:punpcklwd:punpckldq:punpcklqdq + PUNPCKLWD + // PUNPCKHWD is the PUNPCKHWD instruction https://www.felixcloutier.com/x86/punpckhbw:punpckhwd:punpckhdq:punpckhqdq + PUNPCKHWD + // PMULHUW is the PMULHUW instruction https://www.felixcloutier.com/x86/pmulhuw + PMULHUW + // PMULDQ is the PMULDQ instruction https://www.felixcloutier.com/x86/pmuldq + PMULDQ + // PMULHRSW is the PMULHRSW instruction https://www.felixcloutier.com/x86/pmulhrsw + PMULHRSW + // PMULHW is the PMULHW instruction https://www.felixcloutier.com/x86/pmulhw + PMULHW + // CMPEQPS is the CMPEQPS instruction https://www.felixcloutier.com/x86/cmpps + CMPEQPS + // CMPEQPD is the CMPEQPD instruction https://www.felixcloutier.com/x86/cmppd + CMPEQPD + // CVTTPS2DQ is the CVTTPS2DQ instruction https://www.felixcloutier.com/x86/cvttps2dq + CVTTPS2DQ + // CVTDQ2PS is the CVTDQ2PS instruction https://www.felixcloutier.com/x86/cvtdq2ps + CVTDQ2PS + // MOVUPD is the MOVUPD instruction https://www.felixcloutier.com/x86/movupd + MOVUPD + // SHUFPS is the SHUFPS instruction https://www.felixcloutier.com/x86/shufps + SHUFPS + // PMADDWD is the PMADDWD instruction https://www.felixcloutier.com/x86/pmaddwd + PMADDWD + // CVTDQ2PD is the CVTDQ2PD instruction https://www.felixcloutier.com/x86/cvtdq2pd + CVTDQ2PD + // UNPCKLPS is the UNPCKLPS instruction https://www.felixcloutier.com/x86/unpcklps + UNPCKLPS + // PACKUSWB is the PACKUSWB instruction https://www.felixcloutier.com/x86/packuswb + PACKUSWB + // PACKSSDW is the PACKSSDW instruction https://www.felixcloutier.com/x86/packsswb:packssdw + PACKSSDW + // PACKUSDW is the PACKUSDW instruction https://www.felixcloutier.com/x86/packusdw + PACKUSDW + // CVTPS2PD is the CVTPS2PD instruction https://www.felixcloutier.com/x86/cvtps2pd + CVTPS2PD + // CVTPD2PS is the CVTPD2PS instruction https://www.felixcloutier.com/x86/cvtpd2ps + CVTPD2PS + // PMADDUBSW is the PMADDUBSW instruction https://www.felixcloutier.com/x86/pmaddubsw + PMADDUBSW + // CVTTPD2DQ is the CVTTPD2DQ instruction https://www.felixcloutier.com/x86/cvttpd2dq + CVTTPD2DQ // instructionEnd is always placed at the bottom of this iota definition to be used in the test. instructionEnd @@ -731,8 +837,8 @@ func InstructionName(instruction asm.Instruction) string { return "PADDB" case PADDW: return "PADDW" - case PADDL: - return "PADDL" + case PADDD: + return "PADDD" case PADDQ: return "PADDQ" case ADDPS: @@ -743,7 +849,7 @@ func InstructionName(instruction asm.Instruction) string { return "PSUBB" case PSUBW: return "PSUBW" - case PSUBL: + case PSUBD: return "PSUBL" case PSUBQ: return "PSUBQ" @@ -863,6 +969,112 @@ func InstructionName(instruction asm.Instruction) string { return "PMAXSW" case PMAXSB: return "PMAXSB" + case PMULLW: + return "PMULLW" + case PMULLD: + return "PMULLD" + case PMULUDQ: + return "PMULUDQ" + case PSUBSB: + return "PSUBSB" + case PSUBUSB: + return "PSUBUSB" + case PADDSW: + return "PADDSW" + case PADDSB: + return "PADDSB" + case PADDUSW: + return "PADDUSW" + case PSUBSW: + return "PSUBSW" + case PSUBUSW: + return "PSUBUSW" + case PAVGB: + return "PAVGB" + case PAVGW: + return "PAVGW" + case PABSB: + return "PABSB" + case PABSW: + return "PABSW" + case PABSD: + return "PABSD" + case BLENDVPD: + return "BLENDVPD" + case MAXPD: + return "MAXPD" + case MAXPS: + return "MAXPS" + case MINPD: + return "MINPD" + case MINPS: + return "MINPS" + case ANDNPD: + return "ANDNPD" + case ANDNPS: + return "ANDNPS" + case MULPS: + return "MULPS" + case MULPD: + return "MULPD" + case DIVPS: + return "DIVPS" + case DIVPD: + return "DIVPD" + case SQRTPS: + return "SQRTPS" + case SQRTPD: + return "SQRTPD" + case ROUNDPS: + return "ROUNDPS" + case ROUNDPD: + return "ROUNDPD" + case PALIGNR: + return "PALIGNR" + case PUNPCKLWD: + return "PUNPCKLWD" + case PUNPCKHWD: + return "PUNPCKHWD" + case PMULHUW: + return "PMULHUW" + case PMULDQ: + return "PMULDQ" + case PMULHRSW: + return "PMULHRSW" + case PMULHW: + return "PMULHW" + case CMPEQPS: + return "CMPEQPS" + case CMPEQPD: + return "CMPEQPD" + case CVTTPS2DQ: + return "CVTTPS2DQ" + case CVTDQ2PS: + return "CVTDQ2PS" + case MOVUPD: + return "MOVUPD" + case SHUFPS: + return "SHUFPS" + case PMADDWD: + return "PMADDWD" + case CVTDQ2PD: + return "CVTDQ2PD" + case UNPCKLPS: + return "UNPCKLPS" + case PACKUSWB: + return "PACKUSWB" + case PACKSSDW: + return "PACKSSDW" + case PACKUSDW: + return "PACKUSDW" + case CVTPS2PD: + return "CVTPS2PD" + case CVTPD2PS: + return "CVTPD2PS" + case PMADDUBSW: + return "PMADDUBSW" + case CVTTPD2DQ: + return "CVTTPD2DQ" } panic(fmt.Errorf("unknown instruction %d", instruction)) } diff --git a/internal/asm/amd64/impl.go b/internal/asm/amd64/impl.go index c9b213aec59..50f849a8d58 100644 --- a/internal/asm/amd64/impl.go +++ b/internal/asm/amd64/impl.go @@ -1214,12 +1214,12 @@ var registerToRegisterOpcode = map[asm.Instruction]struct { // https://www.felixcloutier.com/x86/paddb:paddw:paddd:paddq PADDB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfc}, requireSrcFloat: true, requireDstFloat: true}, PADDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfd}, requireSrcFloat: true, requireDstFloat: true}, - PADDL: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfe}, requireSrcFloat: true, requireDstFloat: true}, + PADDD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfe}, requireSrcFloat: true, requireDstFloat: true}, PADDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd4}, requireSrcFloat: true, requireDstFloat: true}, // https://www.felixcloutier.com/x86/psubb:psubw:psubd PSUBB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf8}, requireSrcFloat: true, requireDstFloat: true}, PSUBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf9}, requireSrcFloat: true, requireDstFloat: true}, - PSUBL: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfa}, requireSrcFloat: true, requireDstFloat: true}, + PSUBD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfa}, requireSrcFloat: true, requireDstFloat: true}, // https://www.felixcloutier.com/x86/psubq PSUBQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfb}, requireSrcFloat: true, requireDstFloat: true}, // https://www.felixcloutier.com/x86/addps @@ -1330,6 +1330,122 @@ var registerToRegisterOpcode = map[asm.Instruction]struct { PMAXUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3e}, requireSrcFloat: true, requireDstFloat: true}, // https://www.felixcloutier.com/x86/pmaxub:pmaxuw PMAXUB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xde}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmullw + PMULLW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd5}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmulld:pmullq + PMULLD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x40}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmuludq + PMULUDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf4}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/psubsb:psubsw + PSUBSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe8}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/psubsb:psubsw + PSUBSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe9}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/psubusb:psubusw + PSUBUSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd8}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/psubusb:psubusw + PSUBUSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd9}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/paddsb:paddsw + PADDSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xed}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/paddsb:paddsw + PADDSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xec}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/paddusb:paddusw + PADDUSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdd}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pavgb:pavgw + PAVGB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe0}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pavgb:pavgw + PAVGW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe3}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq + PABSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1c}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq + PABSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1d}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq + PABSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1e}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/blendvpd + BLENDVPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x15}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/maxpd + MAXPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5f}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/maxps + MAXPS: {opcode: []byte{0x0f, 0x5f}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/minpd + MINPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5d}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/minps + MINPS: {opcode: []byte{0x0f, 0x5d}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/andnpd + ANDNPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x55}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/andnps + ANDNPS: {opcode: []byte{0x0f, 0x55}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/mulps + MULPS: {opcode: []byte{0x0f, 0x59}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/mulpd + MULPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x59}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/divps + DIVPS: {opcode: []byte{0x0f, 0x5e}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/divpd + DIVPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5e}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/sqrtps + SQRTPS: {opcode: []byte{0x0f, 0x51}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/sqrtpd + SQRTPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x51}, requireSrcFloat: true, requireDstFloat: true}, + ROUNDPS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x08}, requireSrcFloat: true, requireDstFloat: true, needArg: true}, + ROUNDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x09}, requireSrcFloat: true, requireDstFloat: true, needArg: true}, + // https://www.felixcloutier.com/x86/palignr + PALIGNR: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0f}, requireSrcFloat: true, requireDstFloat: true, needArg: true}, + // https://www.felixcloutier.com/x86/punpcklbw:punpcklwd:punpckldq:punpcklqdq + PUNPCKLWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x61}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/punpckhbw:punpckhwd:punpckhdq:punpckhqdq + PUNPCKHWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x69}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmulhuw + PMULHUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe4}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmuldq + PMULDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x28}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmulhrsw + PMULHRSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x0b}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmovsx + PMOVSXBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x20}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmovsx + PMOVSXWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x23}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmovsx + PMOVSXDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x25}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmovzx + PMOVZXBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x30}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmovzx + PMOVZXWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x33}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmovzx + PMOVZXDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x35}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmulhw + PMULHW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe5}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/cmpps + CMPEQPS: {opcode: []byte{0x0f, 0xc2}, requireSrcFloat: true, requireDstFloat: true, needArg: true}, + // https://www.felixcloutier.com/x86/cmppd + CMPEQPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc2}, requireSrcFloat: true, requireDstFloat: true, needArg: true}, + // https://www.felixcloutier.com/x86/cvttps2dq + CVTTPS2DQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5b}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/cvtdq2ps + CVTDQ2PS: {opcode: []byte{0x0f, 0x5b}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/cvtdq2pd + CVTDQ2PD: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xe6}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/cvtpd2ps + CVTPD2PS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5a}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/cvtps2pd + CVTPS2PD: {opcode: []byte{0x0f, 0x5a}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/movupd + MOVUPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x10}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/shufps + SHUFPS: {opcode: []byte{0x0f, 0xc6}, requireSrcFloat: true, requireDstFloat: true, needArg: true}, + // https://www.felixcloutier.com/x86/pmaddwd + PMADDWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf5}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/unpcklps + UNPCKLPS: {opcode: []byte{0x0f, 0x14}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/packuswb + PACKUSWB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x67}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/packsswb:packssdw + PACKSSDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x6b}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/packusdw + PACKUSDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x2b}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/pmaddubsw + PMADDUBSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x04}, requireSrcFloat: true, requireDstFloat: true}, + // https://www.felixcloutier.com/x86/cvttpd2dq + CVTTPD2DQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe6}, requireDstFloat: true, requireSrcFloat: true}, } var RegisterToRegisterShiftOpcode = map[asm.Instruction]struct { @@ -1931,7 +2047,7 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { isFloatReg := IsVectorRegister(n.DstReg) switch n.Instruction { - case PSLLD, PSLLQ, PSRLD, PSRLQ, PSRAW, PSRLW, PSLLW: + case PSLLD, PSLLQ, PSRLD, PSRLQ, PSRAW, PSRLW, PSLLW, PSRAD: if !isFloatReg { return fmt.Errorf("%s needs float register but got %s", InstructionName(n.Instruction), RegisterName(n.DstReg)) } @@ -2093,7 +2209,7 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { a.Buf.Write([]byte{0x66, 0x0f, 0x73, modRM}) a.WriteConst(n.SrcConst, 8) } - case PSRAW: + case PSRAW, PSRAD: // https://www.felixcloutier.com/x86/psraw:psrad:psraq modRM := 0b11_000_000 | // Specifying that operand is register. 0b00_100_000 | // PSRAW with immediate needs "/4" extension. @@ -2102,7 +2218,15 @@ func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) { if rexPrefix != RexPrefixNone { a.Buf.WriteByte(rexPrefix) } - a.Buf.Write([]byte{0x0f, 0x71, modRM}) + + var op byte + if inst == PSRAD { + op = 0x72 + } else { + op = 0x71 + } + + a.Buf.Write([]byte{0x0f, op, modRM}) a.WriteConst(n.SrcConst, 8) case PSRLW: // https://www.felixcloutier.com/x86/psrlw:psrld:psrlq diff --git a/internal/asm/amd64/impl_staticconst.go b/internal/asm/amd64/impl_staticconst.go index 376ae625eb1..63c3684fc75 100644 --- a/internal/asm/amd64/impl_staticconst.go +++ b/internal/asm/amd64/impl_staticconst.go @@ -110,6 +110,10 @@ func (a *AssemblerImpl) encodeStaticConstToRegister(n *NodeImpl) (err error) { // https://www.felixcloutier.com/x86/lea rexPrefix |= RexPrefixW opcodes = []byte{0x8d} + case MOVUPD: + // https://www.felixcloutier.com/x86/movupd + mandatoryPrefix = 0x66 + opcodes = []byte{0x0f, 0x10} default: err = errorEncodingUnsupported(n) return diff --git a/internal/asm/amd64/impl_staticconst_test.go b/internal/asm/amd64/impl_staticconst_test.go index 89c96b6cfb2..602e37524af 100644 --- a/internal/asm/amd64/impl_staticconst_test.go +++ b/internal/asm/amd64/impl_staticconst_test.go @@ -1,6 +1,7 @@ package amd64 import ( + "encoding/hex" "testing" "github.com/tetratelabs/wazero/internal/asm" @@ -138,7 +139,7 @@ func TestAssemblerImpl_encodeStaticConstToRegister(t *testing.T) { a.CompileStandAlone(UD2) // insert any dummy instruction before MOVDQUs. err := a.CompileLoadStaticConstToRegister(MOVDQU, consts[0], RegX12) require.NoError(t, err) - err = a.CompileLoadStaticConstToRegister(MOVDQU, consts[1], RegX0) + err = a.CompileLoadStaticConstToRegister(MOVUPD, consts[1], RegX0) require.NoError(t, err) err = a.CompileLoadStaticConstToRegister(LEAQ, consts[0], RegX0) require.NoError(t, err) @@ -153,9 +154,9 @@ func TestAssemblerImpl_encodeStaticConstToRegister(t *testing.T) { // 0x2: movdqu xmm12, xmmword ptr [rip + 0x18] // where rip = 0x0b, therefore [rip + 0x18] = [0x23] = consts[0]. 0xf3, 0x44, 0x0f, 0x6f, 0x25, 0x18, 0x00, 0x00, 0x00, - // 0x0b: movdqu xmm0, xmmword ptr [rip + 0x18] + // 0x0b: movupd xmm0, xmmword ptr [rip + 0x18] // where rip = 0x13, therefore [rip + 0x18] = [0x2b] = consts[1]. - 0xf3, 0x0f, 0x6f, 0x05, 0x18, 0x00, 0x00, 0x00, + 0x66, 0x0f, 0x10, 0x05, 0x18, 0x00, 0x00, 0x00, // 0x13: lea rax, [rip + 9] // where rip = 0x1a, therefore [rip + 0x9] = [0x23] = consts[0]. 0x48, 0x8d, 0x05, 0x09, 0x00, 0x00, 0x00, @@ -168,6 +169,5 @@ func TestAssemblerImpl_encodeStaticConstToRegister(t *testing.T) { 0x22, 0x22, 0x22, 0x22, // 0x2f: consts[2] 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, - }, actual) - + }, actual, hex.EncodeToString(actual)) } diff --git a/internal/asm/amd64/impl_test.go b/internal/asm/amd64/impl_test.go index 0a49b7ab838..1571701a1ec 100644 --- a/internal/asm/amd64/impl_test.go +++ b/internal/asm/amd64/impl_test.go @@ -487,7 +487,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "MOVDQU", n: &NodeImpl{ Instruction: MOVDQU, - Types: OperandTypesRegisterToRegister, SrcReg: RegX3, DstReg: RegX10, }, @@ -497,7 +496,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "MOVDQU", n: &NodeImpl{ Instruction: MOVDQU, - Types: OperandTypesRegisterToRegister, SrcReg: RegX10, DstReg: RegX3, }, @@ -507,7 +505,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "MOVDQU", n: &NodeImpl{ Instruction: MOVDQU, - Types: OperandTypesRegisterToRegister, SrcReg: RegX10, DstReg: RegX15, }, @@ -517,7 +514,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "MOVDQA", n: &NodeImpl{ Instruction: MOVDQA, - Types: OperandTypesRegisterToRegister, SrcReg: RegX3, DstReg: RegX10, }, @@ -527,7 +523,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "MOVDQA", n: &NodeImpl{ Instruction: MOVDQA, - Types: OperandTypesRegisterToRegister, SrcReg: RegX10, DstReg: RegX3, }, @@ -537,7 +532,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "MOVDQA", n: &NodeImpl{ Instruction: MOVDQA, - Types: OperandTypesRegisterToRegister, SrcReg: RegX10, DstReg: RegX15, }, @@ -547,7 +541,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "PACKSSWB", n: &NodeImpl{ Instruction: PACKSSWB, - Types: OperandTypesRegisterToRegister, SrcReg: RegX10, DstReg: RegX15, }, @@ -557,7 +550,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pmovmskb r15d, xmm10", n: &NodeImpl{ Instruction: PMOVMSKB, - Types: OperandTypesRegisterToRegister, SrcReg: RegX10, DstReg: RegR15, }, @@ -567,7 +559,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "movmskps eax, xmm10", n: &NodeImpl{ Instruction: MOVMSKPS, - Types: OperandTypesRegisterToRegister, SrcReg: RegX10, DstReg: RegAX, }, @@ -577,7 +568,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "movmskps r13d, xmm1", n: &NodeImpl{ Instruction: MOVMSKPS, - Types: OperandTypesRegisterToRegister, SrcReg: RegX1, DstReg: RegR13, }, @@ -587,7 +577,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "movmskpd eax, xmm10", n: &NodeImpl{ Instruction: MOVMSKPD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX10, DstReg: RegAX, }, @@ -597,7 +586,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "movmskpd r15d, xmm1", n: &NodeImpl{ Instruction: MOVMSKPD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX1, DstReg: RegR15, }, @@ -607,7 +595,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pand xmm15, xmm1", n: &NodeImpl{ Instruction: PAND, - Types: OperandTypesRegisterToRegister, SrcReg: RegX1, DstReg: RegX15, }, @@ -617,7 +604,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "por xmm1, xmm15", n: &NodeImpl{ Instruction: POR, - Types: OperandTypesRegisterToRegister, SrcReg: RegX15, DstReg: RegX1, }, @@ -627,7 +613,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pandn xmm13, xmm15", n: &NodeImpl{ Instruction: PANDN, - Types: OperandTypesRegisterToRegister, SrcReg: RegX15, DstReg: RegX13, }, @@ -637,7 +622,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "psrad xmm13, xmm15", n: &NodeImpl{ Instruction: PSRAD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX15, DstReg: RegX13, }, @@ -647,7 +631,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "psraw xmm1, xmm1", n: &NodeImpl{ Instruction: PSRAW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX1, DstReg: RegX1, }, @@ -657,7 +640,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "psrlq xmm14, xmm14", n: &NodeImpl{ Instruction: PSRLQ, - Types: OperandTypesRegisterToRegister, SrcReg: RegX14, DstReg: RegX14, }, @@ -667,7 +649,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "psrld xmm3, xmm3", n: &NodeImpl{ Instruction: PSRLD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX3, DstReg: RegX3, }, @@ -677,7 +658,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "psrlw xmm15, xmm1", n: &NodeImpl{ Instruction: PSRLW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX1, DstReg: RegX15, }, @@ -687,7 +667,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "psllw xmm1, xmm15", n: &NodeImpl{ Instruction: PSLLW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX15, DstReg: RegX1, }, @@ -697,7 +676,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "punpcklbw xmm1, xmm15", n: &NodeImpl{ Instruction: PUNPCKLBW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX15, DstReg: RegX1, }, @@ -707,7 +685,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "punpckhbw xmm11, xmm1", n: &NodeImpl{ Instruction: PUNPCKHBW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX1, DstReg: RegX11, }, @@ -717,7 +694,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pslld xmm11, xmm1", n: &NodeImpl{ Instruction: PSLLD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX1, DstReg: RegX11, }, @@ -727,7 +703,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "psllq xmm11, xmm15", n: &NodeImpl{ Instruction: PSLLQ, - Types: OperandTypesRegisterToRegister, SrcReg: RegX15, DstReg: RegX11, }, @@ -737,7 +712,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "cmpeqps xmm11, xmm15", n: &NodeImpl{ Instruction: CMPPS, - Types: OperandTypesRegisterToRegister, SrcReg: RegX15, DstReg: RegX11, Arg: 0, // CMPPS with arg=0 == Pseudo-Op CMPEQPS. @@ -748,7 +722,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "cmpordps xmm1, xmm5", n: &NodeImpl{ Instruction: CMPPS, - Types: OperandTypesRegisterToRegister, SrcReg: RegX5, DstReg: RegX1, Arg: 7, // CMPPS with arg=7 == Pseudo-Op CMPORDPS. @@ -759,7 +732,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "cmplepd xmm11, xmm15", n: &NodeImpl{ Instruction: CMPPD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX15, DstReg: RegX11, Arg: 2, // CMPPD with arg=2 == Pseudo-Op CMPLEPD. @@ -770,7 +742,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "cmpneqpd xmm1, xmm5", n: &NodeImpl{ Instruction: CMPPD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX5, DstReg: RegX1, Arg: 4, // CMPPD with arg=4 == Pseudo-Op CMPNEQPD. @@ -781,7 +752,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pcmpgtq xmm10, xmm3", n: &NodeImpl{ Instruction: PCMPGTQ, - Types: OperandTypesRegisterToRegister, SrcReg: RegX3, DstReg: RegX10, }, @@ -791,7 +761,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pcmpgtd xmm10, xmm3", n: &NodeImpl{ Instruction: PCMPGTD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX3, DstReg: RegX10, }, @@ -801,7 +770,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pminsd xmm10, xmm3", n: &NodeImpl{ Instruction: PMINSD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX3, DstReg: RegX10, }, @@ -811,7 +779,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pmaxsd xmm1, xmm12", n: &NodeImpl{ Instruction: PMAXSD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX12, DstReg: RegX1, }, @@ -821,7 +788,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pmaxsw xmm1, xmm12", n: &NodeImpl{ Instruction: PMAXSW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX12, DstReg: RegX1, }, @@ -831,7 +797,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pminsw xmm1, xmm12", n: &NodeImpl{ Instruction: PMINSW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX12, DstReg: RegX1, }, @@ -841,7 +806,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pcmpgtb xmm1, xmm12", n: &NodeImpl{ Instruction: PCMPGTB, - Types: OperandTypesRegisterToRegister, SrcReg: RegX12, DstReg: RegX1, }, @@ -851,7 +815,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pminsb xmm1, xmm12", n: &NodeImpl{ Instruction: PMINSB, - Types: OperandTypesRegisterToRegister, SrcReg: RegX12, DstReg: RegX1, }, @@ -861,7 +824,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pmaxsb xmm1, xmm2", n: &NodeImpl{ Instruction: PMAXSB, - Types: OperandTypesRegisterToRegister, SrcReg: RegX2, DstReg: RegX1, }, @@ -871,7 +833,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pminud xmm1, xmm2", n: &NodeImpl{ Instruction: PMINUD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX2, DstReg: RegX1, }, @@ -881,7 +842,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pminuw xmm1, xmm2", n: &NodeImpl{ Instruction: PMINUW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX2, DstReg: RegX1, }, @@ -891,7 +851,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pminub xmm1, xmm2", n: &NodeImpl{ Instruction: PMINUB, - Types: OperandTypesRegisterToRegister, SrcReg: RegX2, DstReg: RegX1, }, @@ -901,7 +860,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pmaxud xmm1, xmm2", n: &NodeImpl{ Instruction: PMAXUD, - Types: OperandTypesRegisterToRegister, SrcReg: RegX2, DstReg: RegX1, }, @@ -911,7 +869,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pmaxuw xmm1, xmm2", n: &NodeImpl{ Instruction: PMAXUW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX2, DstReg: RegX1, }, @@ -921,7 +878,6 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pmaxub xmm1, xmm2", n: &NodeImpl{ Instruction: PMAXUB, - Types: OperandTypesRegisterToRegister, SrcReg: RegX2, DstReg: RegX1, }, @@ -931,12 +887,495 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { name: "pcmpgtw xmm1, xmm2", n: &NodeImpl{ Instruction: PCMPGTW, - Types: OperandTypesRegisterToRegister, SrcReg: RegX2, DstReg: RegX1, }, exp: []byte{0x66, 0xf, 0x65, 0xca}, }, + + { + name: "pmullw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PMULLW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xd5, 0xe9}, + }, + { + name: "pmulld xmm1, xmm11", + n: &NodeImpl{ + Instruction: PMULLD, + SrcReg: RegX11, + DstReg: RegX1, + }, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x40, 0xcb}, + }, + { + name: "pmuludq xmm13, xmm1", + n: &NodeImpl{ + Instruction: PMULUDQ, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xf4, 0xe9}, + }, + { + name: "psubsb xmm13, xmm1", + n: &NodeImpl{ + Instruction: PSUBSB, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xe8, 0xe9}, + }, + { + name: "psubsw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PSUBSW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xe9, 0xe9}, + }, + { + name: "psubusb xmm13, xmm1", + n: &NodeImpl{ + Instruction: PSUBUSB, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xd8, 0xe9}, + }, + { + name: "psubusw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PSUBUSW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xd9, 0xe9}, + }, + { + name: "paddsw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PADDSW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xed, 0xe9}, + }, + { + name: "paddsb xmm13, xmm1", + n: &NodeImpl{ + Instruction: PADDSB, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xec, 0xe9}, + }, + { + name: "paddusw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PADDUSW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xdd, 0xe9}, + }, + { + name: "pavgb xmm13, xmm1", + n: &NodeImpl{ + Instruction: PAVGB, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xe0, 0xe9}, + }, + { + name: "pavgw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PAVGW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xe3, 0xe9}, + }, + { + name: "pabsb xmm13, xmm1", + n: &NodeImpl{ + Instruction: PABSB, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x38, 0x1c, 0xe9}, + }, + { + name: "pabsw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PABSW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x38, 0x1d, 0xe9}, + }, + { + name: "pabsd xmm13, xmm1", + n: &NodeImpl{ + Instruction: PABSD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x38, 0x1e, 0xe9}, + }, + { + name: "blendvpd xmm13, xmm1", + n: &NodeImpl{ + Instruction: BLENDVPD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x38, 0x15, 0xe9}, + }, + { + name: "maxpd xmm13, xmm1", + n: &NodeImpl{ + Instruction: MAXPD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x5f, 0xe9}, + }, + { + name: "maxps xmm13, xmm1", + n: &NodeImpl{ + Instruction: MAXPS, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x44, 0xf, 0x5f, 0xe9}, + }, + { + name: "minpd xmm13, xmm1", + n: &NodeImpl{ + Instruction: MINPD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x5d, 0xe9}, + }, + { + name: "minps xmm13, xmm1", + n: &NodeImpl{ + Instruction: MINPS, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x44, 0xf, 0x5d, 0xe9}, + }, + { + name: "andnpd xmm13, xmm1", + n: &NodeImpl{ + Instruction: ANDNPD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x55, 0xe9}, + }, + { + name: "andnps xmm13, xmm1", + n: &NodeImpl{ + Instruction: ANDNPS, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x44, 0xf, 0x55, 0xe9}, + }, + { + name: "mulps xmm13, xmm1", + n: &NodeImpl{ + Instruction: MULPS, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x44, 0xf, 0x59, 0xe9}, + }, + { + name: "mulpd xmm13, xmm1", + n: &NodeImpl{ + Instruction: MULPD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x59, 0xe9}, + }, + { + name: "divps xmm13, xmm1", + n: &NodeImpl{ + Instruction: DIVPS, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x44, 0xf, 0x5e, 0xe9}, + }, + { + name: "divpd xmm13, xmm1", + n: &NodeImpl{ + Instruction: DIVPD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x5e, 0xe9}, + }, + { + name: "sqrtps xmm13, xmm1", + n: &NodeImpl{ + Instruction: SQRTPS, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x44, 0xf, 0x51, 0xe9}, + }, + { + name: "sqrtpd xmm13, xmm1", + n: &NodeImpl{ + Instruction: SQRTPD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x51, 0xe9}, + }, + { + name: "roundps xmm13, xmm1, 0", + n: &NodeImpl{ + Instruction: ROUNDPS, + SrcReg: RegX1, + DstReg: RegX13, + Arg: 0, + }, + exp: []byte{0x66, 0x44, 0xf, 0x3a, 0x8, 0xe9, 0x0}, + }, + { + name: "roundps xmm13, xmm1, 1", + n: &NodeImpl{ + Instruction: ROUNDPS, + SrcReg: RegX1, + DstReg: RegX13, + Arg: 1, + }, + exp: []byte{0x66, 0x44, 0xf, 0x3a, 0x8, 0xe9, 0x1}, + }, + { + name: "roundps xmm13, xmm1, 3", + n: &NodeImpl{ + Instruction: ROUNDPS, + SrcReg: RegX1, + DstReg: RegX13, + Arg: 3, + }, + exp: []byte{0x66, 0x44, 0xf, 0x3a, 0x8, 0xe9, 0x3}, + }, + { + name: "roundpd xmm13, xmm1, 0", + n: &NodeImpl{ + Instruction: ROUNDPD, + SrcReg: RegX1, + DstReg: RegX13, + Arg: 0, + }, + exp: []byte{0x66, 0x44, 0xf, 0x3a, 0x9, 0xe9, 0x0}, + }, + { + name: "roundpd xmm13, xmm1, 1", + n: &NodeImpl{ + Instruction: ROUNDPD, + SrcReg: RegX1, + DstReg: RegX13, + Arg: 1, + }, + exp: []byte{0x66, 0x44, 0xf, 0x3a, 0x9, 0xe9, 0x1}, + }, + { + name: "roundpd xmm13, xmm1, 3", + n: &NodeImpl{ + Instruction: ROUNDPD, + SrcReg: RegX1, + DstReg: RegX13, + Arg: 3, + }, + exp: []byte{0x66, 0x44, 0xf, 0x3a, 0x9, 0xe9, 0x3}, + }, + { + name: "palignr xmm13, xmm1, 3", + n: &NodeImpl{ + Instruction: PALIGNR, + SrcReg: RegX1, + DstReg: RegX13, + Arg: 3, + }, + exp: []byte{0x66, 0x44, 0xf, 0x3a, 0xf, 0xe9, 0x3}, + }, + { + name: "punpcklwd xmm13, xmm1", + n: &NodeImpl{ + Instruction: PUNPCKLWD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x61, 0xe9}, + }, + { + name: "punpckhwd xmm13, xmm1", + n: &NodeImpl{ + Instruction: PUNPCKHWD, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x69, 0xe9}, + }, + { + name: "pmulhuw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PMULHUW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0xe4, 0xe9}, + }, + { + name: "pmuldq xmm13, xmm1", + n: &NodeImpl{ + Instruction: PMULDQ, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x38, 0x28, 0xe9}, + }, + { + name: "pmulhrsw xmm13, xmm1", + n: &NodeImpl{ + Instruction: PMULHRSW, + SrcReg: RegX1, + DstReg: RegX13, + }, + exp: []byte{0x66, 0x44, 0xf, 0x38, 0xb, 0xe9}, + }, + + { + name: "pmovsxbw xmm5, xmm10", + n: &NodeImpl{Instruction: PMOVSXBW, SrcReg: RegX10, DstReg: RegX5}, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x20, 0xea}, + }, + { + name: "pmovsxwd xmm5, xmm10", + n: &NodeImpl{Instruction: PMOVSXWD, SrcReg: RegX10, DstReg: RegX5}, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x23, 0xea}, + }, + { + name: "pmovsxdq xmm5, xmm10", + n: &NodeImpl{Instruction: PMOVSXDQ, SrcReg: RegX10, DstReg: RegX5}, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x25, 0xea}, + }, + { + name: "pmovzxbw xmm5, xmm10", + n: &NodeImpl{Instruction: PMOVZXBW, SrcReg: RegX10, DstReg: RegX5}, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x30, 0xea}, + }, + { + name: "pmovzxwd xmm5, xmm10", + n: &NodeImpl{Instruction: PMOVZXWD, SrcReg: RegX10, DstReg: RegX5}, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x33, 0xea}, + }, + { + name: "pmovzxdq xmm5, xmm10", + n: &NodeImpl{Instruction: PMOVZXDQ, SrcReg: RegX10, DstReg: RegX5}, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x35, 0xea}, + }, + { + name: "pmulhw xmm2, xmm1", + n: &NodeImpl{Instruction: PMULHW, SrcReg: RegX1, DstReg: RegX2}, + exp: []byte{0x66, 0xf, 0xe5, 0xd1}, + }, + { + name: "cmpltps xmm1, xmm14", + n: &NodeImpl{Instruction: CMPEQPS, SrcReg: RegX14, DstReg: RegX1, Arg: 1}, + exp: []byte{0x41, 0xf, 0xc2, 0xce, 0x1}, + }, + { + name: "cmpunordpd xmm1, xmm14", + n: &NodeImpl{Instruction: CMPEQPD, SrcReg: RegX14, DstReg: RegX1, Arg: 3}, + exp: []byte{0x66, 0x41, 0xf, 0xc2, 0xce, 0x3}, + }, + { + name: "cvttps2dq xmm1, xmm14", + n: &NodeImpl{Instruction: CVTTPS2DQ, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0xf3, 0x41, 0xf, 0x5b, 0xce}, + }, + { + name: "cvtdq2ps xmm1, xmm14", + n: &NodeImpl{Instruction: CVTDQ2PS, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x41, 0xf, 0x5b, 0xce}, + }, + { + name: "movupd xmm1, xmm14", + n: &NodeImpl{Instruction: MOVUPD, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x66, 0x41, 0xf, 0x10, 0xce}, + }, + { + name: "shufps xmm1, xmm14, 5", + n: &NodeImpl{Instruction: SHUFPS, SrcReg: RegX14, DstReg: RegX1, Arg: 5}, + exp: []byte{0x41, 0xf, 0xc6, 0xce, 0x5}, + }, + { + name: "pmaddwd xmm1, xmm14", + n: &NodeImpl{Instruction: PMADDWD, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x66, 0x41, 0xf, 0xf5, 0xce}, + }, + { + name: "cvtdq2pd xmm1, xmm14", + n: &NodeImpl{Instruction: CVTDQ2PD, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0xf3, 0x41, 0xf, 0xe6, 0xce}, + }, + { + name: "unpcklps xmm1, xmm14", + n: &NodeImpl{Instruction: UNPCKLPS, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x41, 0xf, 0x14, 0xce}, + }, + { + name: "packuswb xmm1, xmm14", + n: &NodeImpl{Instruction: PACKUSWB, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x66, 0x41, 0xf, 0x67, 0xce}, + }, + { + name: "packssdw xmm1, xmm14", + n: &NodeImpl{Instruction: PACKSSDW, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x66, 0x41, 0xf, 0x6b, 0xce}, + }, + { + name: "packusdw xmm1, xmm14", + n: &NodeImpl{Instruction: PACKUSDW, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x2b, 0xce}, + }, + { + name: "cvtps2pd xmm1, xmm14", + n: &NodeImpl{Instruction: CVTPS2PD, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x41, 0xf, 0x5a, 0xce}, + }, + { + name: "cvtpd2ps xmm1, xmm14", + n: &NodeImpl{Instruction: CVTPD2PS, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x66, 0x41, 0xf, 0x5a, 0xce}, + }, + { + name: "pmaddubsw xmm1, xmm14", + n: &NodeImpl{Instruction: PMADDUBSW, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x66, 0x41, 0xf, 0x38, 0x4, 0xce}, + }, + { + name: "cvttpd2dq xmm1, xmm14", + n: &NodeImpl{Instruction: CVTTPD2DQ, SrcReg: RegX14, DstReg: RegX1}, + exp: []byte{0x66, 0x41, 0xf, 0xe6, 0xce}, + }, } for _, tt := range tests { @@ -1021,6 +1460,16 @@ func TestAssemblerImpl_EncodeConstToRegister(t *testing.T) { }, exp: []byte{0x66, 0x41, 0xf, 0x71, 0xf2, 0x8}, }, + { + name: "psrad xmm10, 0x1f", + n: &NodeImpl{ + Instruction: PSRAD, + Types: OperandTypesRegisterToRegister, + SrcConst: 0x1f, + DstReg: RegX10, + }, + exp: []byte{0x66, 0x41, 0xf, 0x72, 0xe2, 0x1f}, + }, } for _, tt := range tests { diff --git a/internal/asm/assembler.go b/internal/asm/assembler.go index 100037f04cc..421954f8b98 100644 --- a/internal/asm/assembler.go +++ b/internal/asm/assembler.go @@ -15,7 +15,7 @@ type Register byte const NilRegister Register = 0 // Instruction represents architecture-specific instructions. -type Instruction byte +type Instruction uint16 // ConditionalRegisterState represents architecture-specific conditional // register's states. diff --git a/internal/engine/compiler/compiler.go b/internal/engine/compiler/compiler.go index 8e57a6eab23..3fd689e2687 100644 --- a/internal/engine/compiler/compiler.go +++ b/internal/engine/compiler/compiler.go @@ -16,474 +16,286 @@ type compiler interface { // stackPointerCeil is the max stack pointer that the target function would reach. // staticData is codeStaticData for the resulting native code. compile() (code []byte, staticData codeStaticData, stackPointerCeil uint64, err error) - // compileHostFunction emits the trampoline code from which native code can jump into the host function. + // compileHostFunction adds the trampoline code from which native code can jump into the host function. // TODO: maybe we wouldn't need to have trampoline for host functions. compileHostFunction() error // compileLabel notify compilers of the beginning of a label. // Return true if the compiler decided to skip the entire label. // See wazeroir.OperationLabel compileLabel(o *wazeroir.OperationLabel) (skipThisLabel bool) - // compileUnreachable adds instructions to return to engine with nativeCallStatusCodeUnreachable status. - // See wasm.OpcodeUnreachable + // compileUnreachable adds instruction to perform wazeroir.OperationUnreachable. compileUnreachable() error - // compileSwap adds instruction to swap the stack top value with the target in the Wasm value stack. - // The values are might be on registers or memory-stack at runtime, so compiler implementations - // emit instructions to swap values depending these locations. - // See wazeroir.OperationBrIf + // compileSwap adds instruction to perform wazeroir.OperationSwap. compileSwap(o *wazeroir.OperationSwap) error - // compileGlobalGet adds instructions to read the value of the given index in the ModuleInstance.Globals - // and push the value onto the stack. - // See wasm.OpcodeGlobalGet + // compileGlobalGet adds instructions to perform wazeroir.OperationGlobalGet. compileGlobalGet(o *wazeroir.OperationGlobalGet) error - // compileGlobalSet adds instructions to set the top value on the stack to the given index in the ModuleInstance.Globals. - // See wasm.OpcodeGlobalSet + // compileGlobalSet adds instructions to perform wazeroir.OperationGlobalSet. compileGlobalSet(o *wazeroir.OperationGlobalSet) error - // compileBr adds instructions to branch into the given label. - // See wasm.OpcodeBr + // compileBr adds instructions to perform wazeroir.OperationBr. compileBr(o *wazeroir.OperationBr) error - // compileBrIf adds instructions to pops a value and branch into ".then" label if the value equals 1. - // Otherwise, the code branches into ".else" label. - // See wasm.OpcodeBrIf and wazeroir.OperationBrIf + // compileBrIf adds instructions to perform wazeroir.OperationBrIf. compileBrIf(o *wazeroir.OperationBrIf) error - // compileBrTable adds instructions to do br_table operation. - // A br_table operation has list of targets and default target, and - // this pops a value from the stack (called "index") and decide which branch we go into next - // based on the value. - // - // For example, assume we have operations like {default: L_DEFAULT, targets: [L0, L1, L2]}. - // If "index" >= len(defaults), then branch into the L_DEFAULT label. - // Otherwise, we enter label of targets[index]. - // See wasm.OpcodeBrTable + // compileBrTable adds instructions to perform wazeroir.OperationBrTable. compileBrTable(o *wazeroir.OperationBrTable) error - // compileCall adds instructions to call into a function of the given index. - // See wasm.OpcodeCall + // compileCall adds instructions to perform wazeroir.OperationCall. compileCall(o *wazeroir.OperationCall) error - // compileCallIndirect adds instructions to perform call_indirect operation. - // This consumes the one value from the top of stack (called "offset"), - // and make a function call against the function whose function address equals "table[offset]". - // - // Note: This is called indirect function call in the sense that the target function is indirectly - // determined by the current state (top value) of the stack. - // Therefore, two checks are performed at runtime before entering the target function: - // 1) If "offset" exceeds the length of table, the function exits with nativeCallStatusCodeInvalidTableAccess. - // 2) If the type of the function table[offset] doesn't match the specified function type, the function exits with nativeCallStatusCodeTypeMismatchOnIndirectCall. - // Otherwise, we successfully enter the target function. - // - // See wasm.CallIndirect + // compileCallIndirect adds instructions to perform wazeroir.OperationCallIndirect. compileCallIndirect(o *wazeroir.OperationCallIndirect) error - // compileDrop adds instructions to drop values within the given inclusive range from the value stack. - // See wazeroir.OperationDrop + // compileDrop adds instructions to perform wazeroir.OperationDrop. compileDrop(o *wazeroir.OperationDrop) error - // compileSelect uses top three values on the stack. For example, if we have stack as [..., x1, x2, c] - // and the value "c" equals zero, then the stack results in [..., x1], otherwise, [..., x2]. - // See wasm.OpcodeSelect + // compileSelect adds instructions to perform wazeroir.OperationSelect. compileSelect() error - // compilePick adds instructions to copy a value on the given location in the Wasm value stack, - // and push the copied value onto the top of the stack. - // See wazeroir.OperationPick + // compilePick adds instructions to perform wazeroir.OperationPick. compilePick(o *wazeroir.OperationPick) error - // compileAdd adds instructions to pop two values from the stack, add these two values, and push - // back the result onto the stack. - // See wasm.OpcodeI32Add wasm.OpcodeI64Add wasm.OpcodeF32Add wasm.OpcodeF64Add + // compileAdd adds instructions to perform wazeroir.OperationAdd. compileAdd(o *wazeroir.OperationAdd) error - // compileSub adds instructions to pop two values from the stack, subtract the top from the second one, and push - // back the result onto the stack. - // See wasm.OpcodeI32Sub wasm.OpcodeI64Sub wasm.OpcodeF32Sub wasm.OpcodeF64Sub + // compileSub adds instructions to perform wazeroir.OperationSub. compileSub(o *wazeroir.OperationSub) error - // compileMul adds instructions to pop two values from the stack, multiply these two values, and push - // back the result onto the stack. - // See wasm.OpcodeI32Mul wasm.OpcodeI64Mul wasm.OpcodeF32Mul wasm.OpcodeF64Mul + // compileMul adds instructions to perform wazeroir.OperationMul. compileMul(o *wazeroir.OperationMul) error - // compileClz emits instructions to count up the leading zeros in the - // current top of the stack, and push the count result. - // For example, stack of [..., 0x00_ff_ff_ff] results in [..., 8]. - // See wasm.OpcodeI32Clz wasm.OpcodeI64Clz + // compileClz adds instructions to perform wazeroir.OperationClz. compileClz(o *wazeroir.OperationClz) error - // compileCtz emits instructions to count up the trailing zeros in the - // current top of the stack, and push the count result. - // For example, stack of [..., 0xff_ff_ff_00] results in [..., 8]. - // See wasm.OpcodeI32Ctz wasm.OpcodeI64Ctz + // compileCtz adds instructions to perform wazeroir.OperationCtz. compileCtz(o *wazeroir.OperationCtz) error - // compilePopcnt emits instructions to count up the number of set bits in the - // current top of the stack, and push the count result. - // For example, stack of [..., 0b00_00_00_11] results in [..., 2]. - // See wasm.OpcodeI32Popcnt wasm.OpcodeI64Popcnt + // compilePopcnt adds instructions to perform wazeroir.OperationPopcnt. compilePopcnt(o *wazeroir.OperationPopcnt) error - // compileDiv emits the instructions to perform division on the top two values on the stack. - // See wasm.OpcodeI32DivS wasm.OpcodeI32DivU wasm.OpcodeI64DivS wasm.OpcodeI64DivU wasm.OpcodeF32Div wasm.OpcodeF64Div + // compileDiv adds instructions to perform wazeroir.OperationDiv. compileDiv(o *wazeroir.OperationDiv) error - // compileRem emits the instructions to perform division on the top - // two values of integer type on the stack and puts the remainder of the result - // onto the stack. For example, stack [..., 10, 3] results in [..., 1] where - // the quotient is discarded. - // See wasm.OpcodeI32RemS wasm.OpcodeI32RemU wasm.OpcodeI64RemS wasm.OpcodeI64RemU + // compileRem adds instructions to perform wazeroir.OperationRem. compileRem(o *wazeroir.OperationRem) error - // compileAnd emits instructions to perform logical "and" operation on - // top two values on the stack, and push the result. - // See wasm.OpcodeI32And wasm.OpcodeI64And + // compileAnd adds instructions to perform wazeroir.OperationAnd. compileAnd(o *wazeroir.OperationAnd) error - // compileOr emits instructions to perform logical "or" operation on - // top two values on the stack, and pushes the result. - // See wasm.OpcodeI32Or wasm.OpcodeI64Or + // compileOr adds instructions to perform wazeroir.OperationOr. compileOr(o *wazeroir.OperationOr) error - // compileXor emits instructions to perform logical "xor" operation on - // top two values on the stack, and pushes the result. - // See wasm.OpcodeI32Xor wasm.OpcodeI64Xor + // compileXor adds instructions to perform wazeroir.OperationXor. compileXor(o *wazeroir.OperationXor) error - // compileShl emits instructions to perform a shift-left operation on - // top two values on the stack, and pushes the result. - // See wasm.OpcodeI32Shl wasm.OpcodeI64Shl + // compileShl adds instructions to perform wazeroir.OperationShl. compileShl(o *wazeroir.OperationShl) error - // compileShr emits instructions to perform a shift-right operation on - // top two values on the stack, and pushes the result. - // See wasm.OpcodeI32Shr wasm.OpcodeI64Shr + // compileShr adds instructions to perform wazeroir.OperationShr. compileShr(o *wazeroir.OperationShr) error - // compileRotl emits instructions to perform a rotate-left operation on - // top two values on the stack, and pushes the result. - // See wasm.OpcodeI32Rotl wasm.OpcodeI64Rotl + // compileRotl adds instructions to perform wazeroir.OperationRotl. compileRotl(o *wazeroir.OperationRotl) error - // compileRotr emits instructions to perform a rotate-right operation on - // top two values on the stack, and pushes the result. - // See wasm.OpcodeI32Rotr wasm.OpcodeI64Rotr + // compileRotr adds instructions to perform wazeroir.OperationRotr. compileRotr(o *wazeroir.OperationRotr) error - // compileAbs adds instructions to replace the top value of float type on the stack with its absolute value. - // For example, stack [..., -1.123] results in [..., 1.123]. - // See wasm.OpcodeF32Abs wasm.OpcodeF64Abs + // compileNeg adds instructions to perform wazeroir.OperationAbs. compileAbs(o *wazeroir.OperationAbs) error - // compileNeg adds instructions to replace the top value of float type on the stack with its negated value. - // For example, stack [..., -1.123] results in [..., 1.123]. - // See wasm.OpcodeF32Neg wasm.OpcodeF64Neg + // compileNeg adds instructions to perform wazeroir.OperationNeg. compileNeg(o *wazeroir.OperationNeg) error - // compileCeil adds instructions to replace the top value of float type on the stack with its ceiling value. - // For example, stack [..., 1.123] results in [..., 2.0]. This is equivalent to math.Ceil. - // See wasm.OpcodeF32Ceil wasm.OpcodeF64Ceil + // compileCeil adds instructions to perform wazeroir.OperationCeil. compileCeil(o *wazeroir.OperationCeil) error - // compileFloor adds instructions to replace the top value of float type on the stack with its floor value. - // For example, stack [..., 1.123] results in [..., 1.0]. This is equivalent to math.Floor. - // See wasm.OpcodeF32Floor wasm.OpcodeF64Floor + // compileFloor adds instructions to perform wazeroir.OperationFloor. compileFloor(o *wazeroir.OperationFloor) error - // compileTrunc adds instructions to replace the top value of float type on the stack with its truncated value. - // For example, stack [..., 1.9] results in [..., 1.0]. This is equivalent to math.Trunc. - // See wasm.OpcodeF32Trunc wasm.OpcodeF64Trunc + // compileTrunc adds instructions to perform wazeroir.OperationTrunc. compileTrunc(o *wazeroir.OperationTrunc) error - // compileNearest adds instructions to replace the top value of float type on the stack with its nearest integer value. - // For example, stack [..., 1.9] results in [..., 2.0]. This is *not* equivalent to math.Round and instead has the same - // the semantics of LLVM's rint intrinsic. See https://llvm.org/docs/LangRef.html#llvm-rint-intrinsic. - // For example, math.Round(-4.5) produces -5 while we want to produce -4. - // See wasm.OpcodeF32Nearest wasm.OpcodeF64Nearest + // compileNearest adds instructions to perform wazeroir.OperationNearest. compileNearest(o *wazeroir.OperationNearest) error - // compileSqrt adds instructions to replace the top value of float type on the stack with its square root. - // For example, stack [..., 9.0] results in [..., 3.0]. This is equivalent to "math.Sqrt". - // See wasm.OpcodeF32Sqrt wasm.OpcodeF64Sqrt + // compileSqrt adds instructions perform wazeroir.OperationSqrt. compileSqrt(o *wazeroir.OperationSqrt) error - // compileMin adds instructions to pop two values from the stack, and push back the maximum of - // these two values onto the stack. For example, stack [..., 100.1, 1.9] results in [..., 1.9]. - // Note: WebAssembly specifies that min/max must always return NaN if one of values is NaN, - // which is a different behavior different from math.Min. - // See wasm.OpcodeF32Min wasm.OpcodeF64Min + // compileMin adds instructions perform wazeroir.OperationMin. compileMin(o *wazeroir.OperationMin) error - // compileMax adds instructions to pop two values from the stack, and push back the maximum of - // these two values onto the stack. For example, stack [..., 100.1, 1.9] results in [..., 100.1]. - // Note: WebAssembly specifies that min/max must always return NaN if one of values is NaN, - // which is a different behavior different from math.Max. - // See wasm.OpcodeF32Max wasm.OpcodeF64Max + // compileMax adds instructions perform wazeroir.OperationMax. compileMax(o *wazeroir.OperationMax) error - // compileCopysign adds instructions to pop two float values from the stack, and copy the signbit of - // the first-popped value to the last one. - // For example, stack [..., 1.213, -5.0] results in [..., -1.213]. - // See wasm.OpcodeF32Copysign wasm.OpcodeF64Copysign + // compileCopysign adds instructions to perform wazeroir.OperationCopysign. compileCopysign(o *wazeroir.OperationCopysign) error - // compileI32WrapFromI64 adds instructions to replace the 64-bit int on top of the stack - // with the corresponding 32-bit integer. This is equivalent to uint64(uint32(v)) in Go. - // See wasm.OpcodeI32WrapI64. + // compileI32WrapFromI64 adds instructions to perform wazeroir.OperationI32WrapFromI64. compileI32WrapFromI64() error - // compileITruncFromF adds instructions to replace the top value of float type on the stack with - // the corresponding int value. This is equivalent to int32(math.Trunc(float32(x))), uint32(math.Trunc(float64(x))), etc in Go. - // - // Please refer to [1] and [2] for when we encounter undefined behavior in the WebAssembly specification. - // To summarize, if the source float value is NaN or doesn't fit in the destination range of integers (incl. +=Inf), - // then the runtime behavior is undefined. In wazero, we exit the function in these undefined cases with - // nativeCallStatusCodeInvalidFloatToIntConversion or nativeCallStatusIntegerOverflow status code. - // [1] https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefop-trunc-umathrmtruncmathsfu_m-n-z for unsigned integers. - // [2] https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefop-trunc-smathrmtruncmathsfs_m-n-z for signed integers. - // See OpcodeI32TruncF32S OpcodeI32TruncF32U OpcodeI32TruncF64S OpcodeI32TruncF64U - // See OpcodeI64TruncF32S OpcodeI64TruncF32U OpcodeI64TruncF64S OpcodeI64TruncF64U + // compileITruncFromF adds instructions to perform wazeroir.OperationITruncFromF. compileITruncFromF(o *wazeroir.OperationITruncFromF) error - // compileFConvertFromI adds instructions to replace the top value of int type on the stack with - // the corresponding float value. This is equivalent to float32(uint32(x)), float32(int32(x)), etc in Go. - // See OpcodeI32ConvertF32S OpcodeI32ConvertF32U OpcodeI32ConvertF64S OpcodeI32ConvertF64U - // See OpcodeI64ConvertF32S OpcodeI64ConvertF32U OpcodeI64ConvertF64S OpcodeI64ConvertF64U + // compileFConvertFromI adds instructions to perform wazeroir.OperationFConvertFromI. compileFConvertFromI(o *wazeroir.OperationFConvertFromI) error - // compileF32DemoteFromF64 adds instructions to replace the 64-bit float on top of the stack - // with the corresponding 32-bit float. This is equivalent to float32(float64(v)) in Go. - // See wasm.OpcodeF32DemoteF64 + // compileF32DemoteFromF64 adds instructions to perform wazeroir.OperationF32DemoteFromF64. compileF32DemoteFromF64() error - // compileF64PromoteFromF32 adds instructions to replace the 32-bit float on top of the stack - // with the corresponding 64-bit float. This is equivalent to float64(float32(v)) in Go. - // See wasm.OpcodeF64PromoteF32 + // compileF64PromoteFromF32 adds instructions to perform wazeroir.OperationF64PromoteFromF32. compileF64PromoteFromF32() error - // compileI32ReinterpretFromF32 adds instructions to reinterpret the 32-bit float on top of the stack - // as a 32-bit integer by preserving the bit representation. If the value is on the stack, - // this is no-op as there is nothing to do for converting type. - // See wasm.OpcodeI32ReinterpretF32. + // compileI32ReinterpretFromF32 adds instructions to perform wazeroir.OperationI32ReinterpretFromF32. compileI32ReinterpretFromF32() error - // compileI64ReinterpretFromF64 adds instructions to reinterpret the 64-bit float on top of the stack - // as a 64-bit integer by preserving the bit representation. - // See wasm.OpcodeI64ReinterpretF64. + // compileI64ReinterpretFromF64 adds instructions to perform wazeroir.OperationI64ReinterpretFromF64. compileI64ReinterpretFromF64() error - // compileF32ReinterpretFromI32 adds instructions to reinterpret the 32-bit int on top of the stack - // as a 32-bit float by preserving the bit representation. - // See wasm.OpcodeF32ReinterpretI32. + // compileF32ReinterpretFromI32 adds instructions to perform wazeroir.OperationF32ReinterpretFromI32. compileF32ReinterpretFromI32() error - // compileF64ReinterpretFromI64 adds instructions to reinterpret the 64-bit int on top of the stack - // as a 64-bit float by preserving the bit representation. - // See wasm.OpcodeF64ReinterpretI64. + // compileF64ReinterpretFromI64 adds instructions to perform wazeroir.OperationF64ReinterpretFromI64. compileF64ReinterpretFromI64() error - // compileExtend adds instructions to extend the 32-bit signed or unsigned int on top of the stack - // as a 64-bit integer of corresponding signedness. For unsigned case, this is just reinterpreting the - // underlying bit pattern as 64-bit integer. For signed case, this is sign-extension which preserves the - // original integer's sign. - // See wasm.OpcodeI64ExtendI32S wasm.OpcodeI64ExtendI32U + // compileExtend adds instructions to perform wazeroir.OperationExtend. compileExtend(o *wazeroir.OperationExtend) error - // compileEq adds instructions to pop two values from the stack and push 1 if they equal otherwise 0. - // See wasm.OpcodeI32Eq wasm.OpcodeI64Eq + // compileEq adds instructions to perform wazeroir.OperationEq. compileEq(o *wazeroir.OperationEq) error - // compileEq adds instructions to pop two values from the stack and push 0 if they equal otherwise 1. - // See wasm.OpcodeI32Ne wasm.OpcodeI64Ne + // compileEq adds instructions to perform wazeroir.OperationNe. compileNe(o *wazeroir.OperationNe) error - // compileEq adds instructions to pop a value from the stack and push 1 if it equals zero, 0. - // See wasm.OpcodeI32Eqz wasm.OpcodeI64Eqz + // compileEq adds instructions to perform wazeroir.OperationEqz. compileEqz(o *wazeroir.OperationEqz) error - // compileLt adds instructions to pop two values from the stack and push 1 if the second is less than the top one. Otherwise 0. - // See wasm.OpcodeI32Lt wasm.OpcodeI64Lt + // compileLt adds instructions to perform wazeroir.OperationLt. compileLt(o *wazeroir.OperationLt) error - // compileGt adds instructions to pop two values from the stack and push 1 if the second is greater than the top one. Otherwise 0. - // See wasm.OpcodeI32Gt wasm.OpcodeI64Gt + // compileGt adds instructions to perform wazeroir.OperationGt. compileGt(o *wazeroir.OperationGt) error - // compileLe adds instructions to pop two values from the stack and push 1 if the second is less than or equals the top one. Otherwise 0. - // See wasm.OpcodeI32Le wasm.OpcodeI64Le + // compileLe adds instructions to perform wazeroir.OperationLe. compileLe(o *wazeroir.OperationLe) error - // compileLe adds instructions to pop two values from the stack and push 1 if the second is greater than or equals the top one. Otherwise 0. - // See wasm.OpcodeI32Ge wasm.OpcodeI64Ge + // compileLe adds instructions to perform wazeroir.OperationGe. compileGe(o *wazeroir.OperationGe) error - // compileLoad adds instructions to perform load instruction in WebAssembly. - // See wasm.OpcodeI32Load wasm.OpcodeI64Load wasm.OpcodeF32Load wasm.OpcodeF64Load + // compileLoad adds instructions to perform wazeroir.OperationLoad. compileLoad(o *wazeroir.OperationLoad) error - // compileLoad8 adds instructions to perform load8 instruction in WebAssembly. - // The resulting code checks the memory boundary at runtime, and exit the function with nativeCallStatusCodeMemoryOutOfBounds if out-of-bounds access happens. - // See wasm.OpcodeI32Load8S wasm.OpcodeI32Load8U wasm.OpcodeI64Load8S wasm.OpcodeI64Load8U + // compileLoad8 adds instructions to perform wazeroir.OperationLoad8. compileLoad8(o *wazeroir.OperationLoad8) error - // compileLoad16 adds instructions to perform load16 instruction in WebAssembly. - // The resulting code checks the memory boundary at runtime, and exit the function with nativeCallStatusCodeMemoryOutOfBounds if out-of-bounds access happens. - // See wasm.OpcodeI32Load16S wasm.OpcodeI32Load16U wasm.OpcodeI64Load16S wasm.OpcodeI64Load16U + // compileLoad16 adds instructions to perform wazeroir.OperationLoad16. compileLoad16(o *wazeroir.OperationLoad16) error - // compileLoad32 adds instructions to perform load32 instruction in WebAssembly. - // The resulting code checks the memory boundary at runtime, and exit the function with nativeCallStatusCodeMemoryOutOfBounds - // if out-of-bounds access happens. - // See wasm.OpcodeI64Load32S wasm.OpcodeI64Load32U + // compileLoad32 adds instructions to perform wazeroir.OperationLoad32. compileLoad32(o *wazeroir.OperationLoad32) error - // compileStore adds instructions to perform store instruction in WebAssembly. - // The resulting code checks the memory boundary at runtime, and exit the function with nativeCallStatusCodeMemoryOutOfBounds - // if out-of-bounds access happens. - // See wasm.OpcodeI32Store wasm.OpcodeI64Store wasm.OpcodeF32Store wasm.OpcodeF64Store + // compileStore adds instructions to perform wazeroir.OperationStore. compileStore(o *wazeroir.OperationStore) error - // compileStore8 adds instructions to perform store8 instruction in WebAssembly. - // The resulting code checks the memory boundary at runtime, and exit the function with nativeCallStatusCodeMemoryOutOfBounds - // if out-of-bounds access happens. - // See wasm.OpcodeI32Store8S wasm.OpcodeI32Store8U wasm.OpcodeI64Store8S wasm.OpcodeI64Store8U + // compileStore8 adds instructions to perform wazeroir.OperationStore8. compileStore8(o *wazeroir.OperationStore8) error - // compileStore16 adds instructions to perform store16 instruction in WebAssembly. - // The resulting code checks the memory boundary at runtime, and exit the function with nativeCallStatusCodeMemoryOutOfBounds - // if out-of-bounds access happens. - // See wasm.OpcodeI32Store16S wasm.OpcodeI32Store16U wasm.OpcodeI64Store16S wasm.OpcodeI64Store16U + // compileStore16 adds instructions to perform wazeroir.OperationStore16. compileStore16(o *wazeroir.OperationStore16) error - // compileStore32 adds instructions to perform store32 instruction in WebAssembly. - // The resulting code checks the memory boundary at runtime, and exit the function with nativeCallStatusCodeMemoryOutOfBounds - // if out-of-bounds access happens. - // See wasm.OpcodeI64Store32S wasm.OpcodeI64Store32U + // compileStore32 adds instructions to perform wazeroir.OperationStore32. compileStore32(o *wazeroir.OperationStore32) error - // compileMemorySize adds instruction to pop a value from the stack, grow the memory buffer according to the value, - // and push the previous page size onto the stack. - // See wasm.OpcodeMemoryGrow + // compileMemorySize adds instruction to perform wazeroir.OperationMemoryGrow. compileMemoryGrow() error - // compileMemorySize adds instruction to read the current page size of memory instance and push it onto the stack. - // See wasm.OpcodeMemorySize + // compileMemorySize adds instruction to perform wazeroir.OperationMemorySize. compileMemorySize() error - // compileConstI32 adds instruction to push the given constant i32 value onto the stack. - // See wasm.OpcodeI32Const + // compileConstI32 adds instruction to perform wazeroir.OperationConstI32. compileConstI32(o *wazeroir.OperationConstI32) error - // compileConstI32 adds instruction to push the given constant i64 value onto the stack. - // See wasm.OpcodeI64Const + // compileConstI64 adds instruction to perform wazeroir.OperationConstI64. compileConstI64(o *wazeroir.OperationConstI64) error - // compileConstI32 adds instruction to push the given constant f32 value onto the stack. - // See wasm.OpcodeF32Const + // compileConstF32 adds instruction to perform wazeroir.OperationConstF32. compileConstF32(o *wazeroir.OperationConstF32) error - // compileConstI32 adds instruction to push the given constant f64 value onto the stack. - // See wasm.OpcodeF64Const + // compileConstF64 adds instruction to perform wazeroir.OperationConstF64. compileConstF64(o *wazeroir.OperationConstF64) error - // compileSignExtend32From8 adds instruction to sign-extends the first 8-bits of 32-bit in as signed 32-bit int. - // See wasm.OpcodeI32Extend8S + // compileSignExtend32From8 adds instructions to perform wazeroir.OperationSignExtend32From8. compileSignExtend32From8() error - // compileSignExtend32From16 adds instruction to sign-extends the first 16-bits of 32-bit in as signed 32-bit int. - // See wasm.OpcodeI32Extend16S + // compileSignExtend32From16 adds instructions to perform wazeroir.OperationSignExtend32From16. compileSignExtend32From16() error - // compileSignExtend64From8 adds instruction to sign-extends the first 8-bits of 64-bit in as signed 64-bit int. - // See wasm.OpcodeI64Extend8S + // compileSignExtend64From8 adds instructions to perform wazeroir.OperationSignExtend64From8. compileSignExtend64From8() error - // compileSignExtend64From16 adds instruction to sign-extends the first 16-bits of 64-bit in as signed 64-bit int. - // See wasm.OpcodeI64Extend16S + // compileSignExtend64From16 adds instructions to perform wazeroir.OperationSignExtend64From16. compileSignExtend64From16() error - // compileSignExtend64From32 adds instruction to sign-extends the first 32-bits of 64-bit in as signed 64-bit int. - // See wasm.OpcodeI64Extend32S + // compileSignExtend64From32 adds instructions to perform wazeroir.OperationSignExtend64From32. compileSignExtend64From32() error - // compileMemoryInit adds instructions to perform operations corresponding to the wasm.OpcodeMemoryInitName instruction in - // wasm.FeatureBulkMemoryOperations. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions + // compileMemoryInit adds instructions to perform wazeroir.OperationMemoryInit. compileMemoryInit(*wazeroir.OperationMemoryInit) error - // compileDataDrop adds instructions to perform operations corresponding to the wasm.OpcodeDataDropName instruction in - // wasm.FeatureBulkMemoryOperations. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions + // compileDataDrop adds instructions to perform wazeroir.OperationDataDrop. compileDataDrop(*wazeroir.OperationDataDrop) error - // compileMemoryCopy adds instructions to perform operations corresponding to the wasm.OpcodeMemoryCopylName instruction in - // wasm.FeatureBulkMemoryOperations. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions + // compileMemoryCopy adds instructions to perform wazeroir.OperationMemoryCopy. compileMemoryCopy() error - // compileMemoryCopy adds instructions to perform operations corresponding to the wasm.OpcodeMemoryFillName instruction in - // wasm.FeatureBulkMemoryOperations. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions + // compileMemoryFill adds instructions to perform wazeroir.OperationMemoryFill. compileMemoryFill() error - // compileTableInit adds instructions to perform operations corresponding to the wasm.OpcodeTableInit instruction in - // wasm.FeatureBulkMemoryOperations. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions + // compileTableInit adds instructions to perform wazeroir.OperationTableInit. compileTableInit(*wazeroir.OperationTableInit) error - // compileTableCopy adds instructions to perform operations corresponding to the wasm.OpcodeTableCopy instruction in - // wasm.FeatureBulkMemoryOperations. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions + // compileTableCopy adds instructions to perform wazeroir.OperationTableCopy. compileTableCopy(*wazeroir.OperationTableCopy) error - // compileElemDrop adds instructions to perform operations corresponding to the wasm.OpcodeElemDrop instruction in - // wasm.FeatureBulkMemoryOperations. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions + // compileElemDrop adds instructions to perform wazeroir.OperationElemDrop. compileElemDrop(*wazeroir.OperationElemDrop) error - // compileRefFunc adds instructions to perform operations corresponding to wasm.OpcodeRefFunc instruction introduced in - // wasm.FeatureReferenceTypes. - // - // Note: in wazero, we express any reference types (funcref or externref) as opaque pointers which is uint64. - // Therefore, the compilers implementations emit instructions to push the address of *function onto the stack. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/valid/instructions.html#xref-syntax-instructions-syntax-instr-ref-mathsf-ref-func-x + // compileRefFunc adds instructions to perform wazeroir.OperationRefFunc. compileRefFunc(*wazeroir.OperationRefFunc) error - // compileTableGet adds instructions to perform operations corresponding to wasm.OpcodeTableGet instruction introduced in - // wasm.FeatureReferenceTypes. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/valid/instructions.html#xref-syntax-instructions-syntax-instr-table-mathsf-table-get-x + // compileTableGet adds instructions to perform wazeroir.OperationTableGet. compileTableGet(*wazeroir.OperationTableGet) error - // compileTableSet adds instructions to perform operations corresponding to wasm.OpcodeTableSet instruction introduced in - // wasm.FeatureReferenceTypes. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/valid/instructions.html#xref-syntax-instructions-syntax-instr-table-mathsf-table-set-x + // compileTableSet adds instructions to perform wazeroir.OperationTableSet. compileTableSet(*wazeroir.OperationTableSet) error - // compileTableGrow adds instructions to perform operations corresponding to wasm.OpcodeMiscTableGrow instruction introduced in - // wasm.FeatureReferenceTypes. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/valid/instructions.html#xref-syntax-instructions-syntax-instr-table-mathsf-table-grow-x + // compileTableGrow adds instructions to perform wazeroir.OperationTableGrow. compileTableGrow(*wazeroir.OperationTableGrow) error - // compileTableSize adds instructions to perform operations corresponding to wasm.OpcodeMiscTableSize instruction introduced in - // wasm.FeatureReferenceTypes. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/valid/instructions.html#xref-syntax-instructions-syntax-instr-table-mathsf-table-size-x + // compileTableSize adds instructions to perform wazeroir.OperationTableSize. compileTableSize(*wazeroir.OperationTableSize) error - // compileTableFill adds instructions to perform operations corresponding to wasm.OpcodeMiscTableFill instruction introduced in - // wasm.FeatureReferenceTypes. - // - // https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/valid/instructions.html#xref-syntax-instructions-syntax-instr-table-mathsf-table-fill-x + // compileTableFill adds instructions to perform wazeroir.OperationTableFill. compileTableFill(*wazeroir.OperationTableFill) error - // compileV128Const adds instructions to push a constant V128 value onto the stack. - // See wasm.OpcodeVecV128Const + // compileV128Const adds instructions to perform wazeroir.OperationV128Const. compileV128Const(*wazeroir.OperationV128Const) error - // compileV128Add adds instruction to add two vector values whose shape is specified as `o.Shape`. - // See wasm.OpcodeVecI8x16Add wasm.OpcodeVecI16x8Add wasm.OpcodeVecI32x4Add wasm.OpcodeVecI64x2Add wasm.OpcodeVecF32x4Add wasm.OpcodeVecF64x2Add + // compileV128Add adds instructions to perform wazeroir.OperationV128Add. compileV128Add(o *wazeroir.OperationV128Add) error - // compileV128Sub adds instruction to subtract two vector values whose shape is specified as `o.Shape`. - // See wasm.OpcodeVecI8x16Sub wasm.OpcodeVecI16x8Sub wasm.OpcodeVecI32x4Sub wasm.OpcodeVecI64x2Sub wasm.OpcodeVecF32x4Sub wasm.OpcodeVecF64x2Sub + // compileV128Sub adds instructions to perform wazeroir.OperationV128Sub. + // compileV128Sub adds instructions to perform wazeroir.OperationV128Sub. compileV128Sub(o *wazeroir.OperationV128Sub) error - // compileV128Load adds instruction to perform vector load kind instructions. - // See wasm.OpcodeVecV128Load* instructions. + // compileV128Load adds instructions to perform wazeroir.OperationV128Load. compileV128Load(o *wazeroir.OperationV128Load) error - // compileV128LoadLane adds instructions which are equivalent to wasm.OpcodeVecV128LoadXXLane instructions. - // See wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName + // compileV128LoadLane adds instructions to perform wazeroir.OperationV128LoadLane. compileV128LoadLane(o *wazeroir.OperationV128LoadLane) error - // compileV128Store adds instructions which are equivalent to wasm.OpcodeVecV128StoreName. + // compileV128Store adds instructions to perform wazeroir.OperationV128Store. compileV128Store(o *wazeroir.OperationV128Store) error - // compileV128StoreLane adds instructions which are equivalent to wasm.OpcodeVecV128StoreXXLane instructions. - // See wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName. + // compileV128StoreLane adds instructions to perform wazeroir.OperationV128StoreLane. compileV128StoreLane(o *wazeroir.OperationV128StoreLane) error - // compileV128ExtractLane adds instructions which are equivalent to wasm.OpcodeVecXXXXExtractLane instructions. - // See wasm.OpcodeVecI8x16ExtractLaneSName wasm.OpcodeVecI8x16ExtractLaneUName wasm.OpcodeVecI16x8ExtractLaneSName wasm.OpcodeVecI16x8ExtractLaneUName - // wasm.OpcodeVecI32x4ExtractLaneName wasm.OpcodeVecI64x2ExtractLaneName wasm.OpcodeVecF32x4ExtractLaneName wasm.OpcodeVecF64x2ExtractLaneName. + // compileV128ExtractLane adds instructions to perform wazeroir.OperationV128ExtractLane. compileV128ExtractLane(o *wazeroir.OperationV128ExtractLane) error - // compileV128ReplaceLane adds instructions which are equivalent to wasm.OpcodeVecXXXXReplaceLane instructions. - // See wasm.OpcodeVecI8x16ReplaceLaneName wasm.OpcodeVecI16x8ReplaceLaneName wasm.OpcodeVecI32x4ReplaceLaneName wasm.OpcodeVecI64x2ReplaceLaneName - // wasm.OpcodeVecF32x4ReplaceLaneName wasm.OpcodeVecF64x2ReplaceLaneName. + // compileV128ReplaceLane adds instructions to perform wazeroir.OperationV128ReplaceLane. compileV128ReplaceLane(o *wazeroir.OperationV128ReplaceLane) error - // compileV128Splat adds instructions which are equivalent to wasm.OpcodeVecXXXSplat instructions. - // See wasm.OpcodeVecI8x16SplatName wasm.OpcodeVecI16x8SplatName wasm.OpcodeVecI32x4SplatName wasm.OpcodeVecI64x2SplatName - // wasm.OpcodeVecF32x4SplatName wasm.OpcodeVecF64x2SplatName. + // compileV128Splat adds instructions to perform wazeroir.OperationV128Splat. compileV128Splat(o *wazeroir.OperationV128Splat) error - // compileV128Shuffle adds instructions which are equivalent to wasm.OpcodeVecV128i8x16ShuffleName instruction. + // compileV128Shuffle adds instructions to perform wazeroir.OperationV128Shuffle. compileV128Shuffle(o *wazeroir.OperationV128Shuffle) error - // compileV128Swizzle adds instructions which are equivalent to wasm.OpcodeVecI8x16SwizzleName instruction. + // compileV128Swizzle adds instructions to perform wazeroir.OperationV128Swizzle. compileV128Swizzle(o *wazeroir.OperationV128Swizzle) error - // compileV128Swizzle adds instructions which are equivalent to wasm.OpcodeVecV128AnyTrueName instruction. + // compileV128AnyTrue adds instructions to perform wazeroir.OperationV128AnyTrue. compileV128AnyTrue(o *wazeroir.OperationV128AnyTrue) error - // compileV128AllTrue adds instructions which are equivalent to wasm.OpcodeVecXXXAllTrue instructions. - // See wasm.OpcodeVecI8x16AllTrueName wasm.OpcodeVecI16x8AllTrueName wasm.OpcodeVecI32x4AllTrueName wasm.OpcodeVecI64x2AllTrueName. + // compileV128AllTrue adds instructions to perform wazeroir.OperationV128AllTrue. compileV128AllTrue(o *wazeroir.OperationV128AllTrue) error - // compileV128BitMask adds instructions which are equivalent to wasm.OpcodeVecV128XXXBitMask instruction. - // See wasm.OpcodeVecI8x16BitMaskName wasm.OpcodeVecI16x8BitMaskName wasm.OpcodeVecI32x4BitMaskName wasm.OpcodeVecI64x2BitMaskName. + // compileV128BitMask adds instructions to perform wazeroir.OperationV128BitMask. compileV128BitMask(*wazeroir.OperationV128BitMask) error - // compileV128And adds instructions which are equivalent to wasm.OpcodeVecV128AndName instruction. - // See wasm.OpcodeVecV128AndName. + // compileV128And adds instructions to perform wazeroir.OperationV128And. compileV128And(*wazeroir.OperationV128And) error - // compileV128Not adds instructions which are equivalent to wasm.OpcodeVecV128NotName instruction. - // See wasm.OpcodeVecV128NotName. + // compileV128Not adds instructions to perform wazeroir.OperationV128Not. compileV128Not(*wazeroir.OperationV128Not) error - // compileV128Or adds instructions which are equivalent to wasm.OpcodeVecV128OrName instruction. - // See wasm.OpcodeVecV128OrName. + // compileV128Or adds instructions to perform wazeroir.OperationV128Or. compileV128Or(*wazeroir.OperationV128Or) error - // compileV128Xor adds instructions which are equivalent to wasm.OpcodeVecV128XorName instruction. - // See wasm.OpcodeVecV128XorName. + // compileV128Xor adds instructions to perform wazeroir.OperationV128Xor. compileV128Xor(*wazeroir.OperationV128Xor) error - // compileV128Bitselect adds instructions which are equivalent to wasm.OpcodeVecV128BitselectName instruction. - // See wasm.OpcodeVecV128BitselectName. + // compileV128Bitselect adds instructions to perform wazeroir.OperationV128Bitselect. compileV128Bitselect(*wazeroir.OperationV128Bitselect) error - // compileV128AndNot adds instructions which are equivalent to wasm.OpcodeVecV128AndNotName instruction. - // See wasm.OpcodeVecV128AndNotName. + // compileV128AndNot adds instructions to perform wazeroir.OperationV128AndNot. compileV128AndNot(*wazeroir.OperationV128AndNot) error - // compileV128Shr adds instructions which are equivalent to wasm.OpcodeVecXXXShrYYYY instructions. - // See wasm.OpcodeVecI8x16ShrSName wasm.OpcodeVecI8x16ShrUName wasm.OpcodeVecI16x8ShrSName - // wasm.OpcodeVecI16x8ShrUName wasm.OpcodeVecI32x4ShrSName wasm.OpcodeVecI32x4ShrUName. - // wasm.OpcodeVecI64x2ShrSName wasm.OpcodeVecI64x2ShrUName. + // compileV128Shr adds instructions to perform wazeroir.OperationV128Shr. compileV128Shr(*wazeroir.OperationV128Shr) error - // compileV128Shl adds instructions which are equivalent to wasm.OpcodeVecXXXShl instructions. - // See wasm.OpcodeVecI8x16ShlName wasm.OpcodeVecI16x8ShlName wasm.OpcodeVecI32x4ShlName wasm.OpcodeVecI64x2ShlName + // compileV128Shl adds instructions to perform wazeroir.OperationV128Shl. compileV128Shl(*wazeroir.OperationV128Shl) error - // compileV128Cmp adds instructions which are equivalent to various vector comparison instructions. - // See wasm.OpcodeVecI8x16EqName, wasm.OpcodeVecI8x16NeName, wasm.OpcodeVecI8x16LtSName, wasm.OpcodeVecI8x16LtUName, wasm.OpcodeVecI8x16GtSName, - // wasm.OpcodeVecI8x16GtUName, wasm.OpcodeVecI8x16LeSName, wasm.OpcodeVecI8x16LeUName, wasm.OpcodeVecI8x16GeSName, wasm.OpcodeVecI8x16GeUName, - // wasm.OpcodeVecI16x8EqName, wasm.OpcodeVecI16x8NeName, wasm.OpcodeVecI16x8LtSName, wasm.OpcodeVecI16x8LtUName, wasm.OpcodeVecI16x8GtSName, - // wasm.OpcodeVecI16x8GtUName, wasm.OpcodeVecI16x8LeSName, wasm.OpcodeVecI16x8LeUName, wasm.OpcodeVecI16x8GeSName, wasm.OpcodeVecI16x8GeUName, - // wasm.OpcodeVecI32x4EqName, wasm.OpcodeVecI32x4NeName, wasm.OpcodeVecI32x4LtSName, wasm.OpcodeVecI32x4LtUName, wasm.OpcodeVecI32x4GtSName, - // wasm.OpcodeVecI32x4GtUName, wasm.OpcodeVecI32x4LeSName, wasm.OpcodeVecI32x4LeUName, wasm.OpcodeVecI32x4GeSName, wasm.OpcodeVecI32x4GeUName, - // wasm.OpcodeVecI64x2EqName, wasm.OpcodeVecI64x2NeName, wasm.OpcodeVecI64x2LtSName, wasm.OpcodeVecI64x2GtSName, wasm.OpcodeVecI64x2LeSName, - // wasm.OpcodeVecI64x2GeSName, wasm.OpcodeVecF32x4EqName, wasm.OpcodeVecF32x4NeName, wasm.OpcodeVecF32x4LtName, wasm.OpcodeVecF32x4GtName, - // wasm.OpcodeVecF32x4LeName, wasm.OpcodeVecF32x4GeName, wasm.OpcodeVecF64x2EqName, wasm.OpcodeVecF64x2NeName, wasm.OpcodeVecF64x2LtName, - // wasm.OpcodeVecF64x2GtName, wasm.OpcodeVecF64x2LeName, wasm.OpcodeVecF64x2GeName + // compileV128Cmp adds instructions to perform wazeroir.OperationV128Cmp. compileV128Cmp(*wazeroir.OperationV128Cmp) error + // compileV128AddSat adds instructions to perform wazeroir.OperationV128AddSat. + compileV128AddSat(*wazeroir.OperationV128AddSat) error + // compileV128SubSat adds instructions to perform wazeroir.OperationV128SubSat. + compileV128SubSat(*wazeroir.OperationV128SubSat) error + // compileV128Mul adds instructions to perform wazeroir.OperationV128Mul. + compileV128Mul(*wazeroir.OperationV128Mul) error + // compileV128Div adds instructions to perform wazeroir.OperationV128Div. + compileV128Div(*wazeroir.OperationV128Div) error + // compileV128Neg adds instructions to perform wazeroir.OperationV128Neg. + compileV128Neg(*wazeroir.OperationV128Neg) error + // compileV128Sqrt adds instructions to perform wazeroir.OperationV128Sqrt. + compileV128Sqrt(*wazeroir.OperationV128Sqrt) error + // compileV128Abs adds instructions to perform wazeroir.OperationV128Abs. + compileV128Abs(*wazeroir.OperationV128Abs) error + // compileV128Popcnt adds instructions to perform wazeroir.OperationV128Popcnt. + compileV128Popcnt(*wazeroir.OperationV128Popcnt) error + // compileV128Min adds instructions to perform wazeroir.OperationV128Min. + compileV128Min(*wazeroir.OperationV128Min) error + // compileV128Max adds instructions to perform wazeroir.OperationV128Max. + compileV128Max(*wazeroir.OperationV128Max) error + // compileV128AvgrU adds instructions to perform wazeroir.OperationV128AvgrU. + compileV128AvgrU(*wazeroir.OperationV128AvgrU) error + // compileV128Pmin adds instructions to perform wazeroir.OperationV128Pmin. + compileV128Pmin(*wazeroir.OperationV128Pmin) error + // compileV128Pmax adds instructions to perform wazeroir.OperationV128Pmax. + compileV128Pmax(*wazeroir.OperationV128Pmax) error + // compileV128Ceil adds instructions to perform wazeroir.OperationV128Ceil. + compileV128Ceil(*wazeroir.OperationV128Ceil) error + // compileV128Floor adds instructions to perform wazeroir.OperationV128Floor. + compileV128Floor(*wazeroir.OperationV128Floor) error + // compileV128Trunc adds instructions to perform wazeroir.OperationV128Trunc. + compileV128Trunc(*wazeroir.OperationV128Trunc) error + // compileV128Nearest adds instructions to perform wazeroir.OperationV128Nearest. + compileV128Nearest(*wazeroir.OperationV128Nearest) error + // compileV128Extend adds instructions to perform wazeroir.OperationV128Extend. + compileV128Extend(*wazeroir.OperationV128Extend) error + // compileV128ExtMul adds instructions to perform wazeroir.OperationV128ExtMul. + compileV128ExtMul(*wazeroir.OperationV128ExtMul) error + // compileV128Q15mulrSatS adds instructions to perform wazeroir.OperationV128Q15mulrSatS. + compileV128Q15mulrSatS(*wazeroir.OperationV128Q15mulrSatS) error + // compileV128ExtAddPairwise adds instructions to perform wazeroir.OperationV128ExtAddPairwise. + compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAddPairwise) error + // compileV128FloatPromote adds instructions to perform wazeroir.OperationV128FloatPromote. + compileV128FloatPromote(o *wazeroir.OperationV128FloatPromote) error + // compileV128FloatDemote adds instructions to perform wazeroir.OperationV128FloatDemote. + compileV128FloatDemote(o *wazeroir.OperationV128FloatDemote) error + // compileV128FConvertFromI adds instructions to perform wazeroir.OperationV128FConvertFromI. + compileV128FConvertFromI(o *wazeroir.OperationV128FConvertFromI) error + // compileV128Dot adds instructions to perform wazeroir.OperationV128Dot. + compileV128Dot(o *wazeroir.OperationV128Dot) error + // compileV128Narrow adds instructions to perform wazeroir.OperationV128Narrow. + compileV128Narrow(o *wazeroir.OperationV128Narrow) error + // compileV128ITruncSatFromF adds instructions to perform wazeroir.OperationV128ITruncSatFromF. + compileV128ITruncSatFromF(o *wazeroir.OperationV128ITruncSatFromF) error } diff --git a/internal/engine/compiler/compiler_vec_test.go b/internal/engine/compiler/compiler_vec_test.go index a658d72d051..3bd8a281089 100644 --- a/internal/engine/compiler/compiler_vec_test.go +++ b/internal/engine/compiler/compiler_vec_test.go @@ -6,16 +6,115 @@ import ( "runtime" "testing" + "github.com/tetratelabs/wazero/internal/moremath" "github.com/tetratelabs/wazero/internal/testing/require" "github.com/tetratelabs/wazero/internal/wasm" "github.com/tetratelabs/wazero/internal/wazeroir" ) func TestCompiler_compileV128Add(t *testing.T) { - // TODO + tests := []struct { + name string + shape wazeroir.Shape + x1, x2, exp [16]byte + }{ + { + name: "i8x16", + shape: wazeroir.ShapeI8x16, + x1: [16]byte{0: 1, 2: 10, 10: 10}, + x2: [16]byte{0: 10, 4: 5, 10: 5}, + exp: [16]byte{0: 11, 2: 10, 4: 5, 10: 15}, + }, + { + name: "i16x8", + shape: wazeroir.ShapeI16x8, + x1: i16x8(1123, 0, 123, 1, 1, 5, 8, 1), + x2: i16x8(0, 123, 123, 0, 1, 5, 9, 1), + exp: i16x8(1123, 123, 246, 1, 2, 10, 17, 2), + }, + { + name: "i32x4", + shape: wazeroir.ShapeI32x4, + x1: i32x4(i32ToU32(-123), 5, 4, math.MaxUint32), + x2: i32x4(i32ToU32(-10), 1, i32ToU32(-104), math.MaxUint32), + exp: i32x4(i32ToU32(-133), 6, i32ToU32(-100), math.MaxUint32-1), + }, + { + name: "i64x2", + shape: wazeroir.ShapeI64x2, + x1: i64x2(i64ToU64(math.MinInt64), 12345), + x2: i64x2(i64ToU64(-1), i64ToU64(-12345)), + exp: i64x2(i64ToU64(math.MinInt64)+i64ToU64(-1), 0), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(1.0, 123, float32(math.Inf(1)), float32(math.Inf(-1))), + x2: f32x4(51234.12341, 123, math.MaxFloat32, -123), + exp: f32x4(51235.12341, 246, float32(math.Inf(1)), float32(math.Inf(-1))), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(1.123, math.Inf(1)), + x2: f64x2(1.123, math.MinInt64), + exp: f64x2(2.246, math.Inf(1)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Add(&wazeroir.OperationV128Add{Shape: tc.shape}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } } func TestCompiler_compileV128Sub(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + tests := []struct { name string shape wazeroir.Shape @@ -28,7 +127,41 @@ func TestCompiler_compileV128Sub(t *testing.T) { x2: [16]byte{0: 10, 4: 5, 10: 5}, exp: [16]byte{0: i8ToU8(-9), 2: 10, 4: i8ToU8(-5), 10: 5}, }, - // TODO: add more cases. + { + name: "i16x8", + shape: wazeroir.ShapeI16x8, + x1: i16x8(1123, 0, 123, 1, 1, 5, 8, 1), + x2: i16x8(0, 123, 123, 0, 1, 5, 9, 1), + exp: i16x8(1123, i16ToU16(-123), 0, 1, 0, 0, i16ToU16(-1), 0), + }, + { + name: "i32x4", + shape: wazeroir.ShapeI32x4, + x1: i32x4(i32ToU32(-123), 5, 4, math.MaxUint32), + x2: i32x4(i32ToU32(-10), 1, i32ToU32(-104), math.MaxUint32), + exp: i32x4(i32ToU32(-113), 4, 108, 0), + }, + { + name: "i64x2", + shape: wazeroir.ShapeI64x2, + x1: i64x2(i64ToU64(math.MinInt64), 12345), + x2: i64x2(i64ToU64(-1), i64ToU64(-12345)), + exp: i64x2(i64ToU64(math.MinInt64+1), 12345*2), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(1.0, 123, float32(math.Inf(1)), float32(math.Inf(-1))), + x2: f32x4(51234.12341, 123, math.MaxFloat32, -123), + exp: f32x4(-51233.12341, 0, float32(math.Inf(1)), float32(math.Inf(-1))), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(1.123, math.Inf(1)), + x2: f64x2(1.123, math.MinInt64), + exp: f64x2(0, math.Inf(1)), + }, } for _, tc := range tests { @@ -82,26 +215,26 @@ func TestCompiler_compileV128Load(t *testing.T) { tests := []struct { name string memSetupFn func(buf []byte) - loadType wazeroir.LoadV128Type + loadType wazeroir.V128LoadType offset uint32 exp [16]byte }{ { - name: "v128 offset=0", loadType: wazeroir.LoadV128Type128, offset: 0, + name: "v128 offset=0", loadType: wazeroir.V128LoadType128, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}) }, exp: [16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, }, { - name: "v128 offset=2", loadType: wazeroir.LoadV128Type128, offset: 2, + name: "v128 offset=2", loadType: wazeroir.V128LoadType128, offset: 2, memSetupFn: func(buf []byte) { copy(buf, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}) }, exp: [16]byte{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}, }, { - name: "8x8s offset=0", loadType: wazeroir.LoadV128Type8x8s, offset: 0, + name: "8x8s offset=0", loadType: wazeroir.V128LoadType8x8s, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 0xff, 7, 0xff, 9, 10, @@ -113,7 +246,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "8x8s offset=3", loadType: wazeroir.LoadV128Type8x8s, offset: 3, + name: "8x8s offset=3", loadType: wazeroir.V128LoadType8x8s, offset: 3, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 0xff, 7, 0xff, 9, 10, @@ -125,7 +258,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "8x8u offset=0", loadType: wazeroir.LoadV128Type8x8u, offset: 0, + name: "8x8u offset=0", loadType: wazeroir.V128LoadType8x8u, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 0xff, 7, 0xff, 9, 10, @@ -137,7 +270,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "8x8i offset=3", loadType: wazeroir.LoadV128Type8x8u, offset: 3, + name: "8x8i offset=3", loadType: wazeroir.V128LoadType8x8u, offset: 3, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 0xff, 7, 0xff, 9, 10, @@ -149,7 +282,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "16x4s offset=0", loadType: wazeroir.LoadV128Type16x4s, offset: 0, + name: "16x4s offset=0", loadType: wazeroir.V128LoadType16x4s, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 0xff, 7, 0xff, 9, 10, @@ -164,7 +297,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "16x4s offset=3", loadType: wazeroir.LoadV128Type16x4s, offset: 3, + name: "16x4s offset=3", loadType: wazeroir.V128LoadType16x4s, offset: 3, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 0xff, 0xff, 9, 10, @@ -179,7 +312,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "16x4u offset=0", loadType: wazeroir.LoadV128Type16x4u, offset: 0, + name: "16x4u offset=0", loadType: wazeroir.V128LoadType16x4u, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 0xff, 7, 0xff, 9, 10, @@ -194,7 +327,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "16x4u offset=3", loadType: wazeroir.LoadV128Type16x4u, offset: 3, + name: "16x4u offset=3", loadType: wazeroir.V128LoadType16x4u, offset: 3, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 0xff, 0xff, 9, 10, @@ -209,7 +342,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "32x2s offset=0", loadType: wazeroir.LoadV128Type32x2s, offset: 0, + name: "32x2s offset=0", loadType: wazeroir.V128LoadType32x2s, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 10, @@ -222,7 +355,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "32x2s offset=2", loadType: wazeroir.LoadV128Type32x2s, offset: 2, + name: "32x2s offset=2", loadType: wazeroir.V128LoadType32x2s, offset: 2, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -235,7 +368,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "32x2u offset=0", loadType: wazeroir.LoadV128Type32x2u, offset: 0, + name: "32x2u offset=0", loadType: wazeroir.V128LoadType32x2u, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 10, @@ -248,7 +381,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "32x2u offset=2", loadType: wazeroir.LoadV128Type32x2u, offset: 2, + name: "32x2u offset=2", loadType: wazeroir.V128LoadType32x2u, offset: 2, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -261,7 +394,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "32zero offset=0", loadType: wazeroir.LoadV128Type32zero, offset: 0, + name: "32zero offset=0", loadType: wazeroir.V128LoadType32zero, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -274,7 +407,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "32zero offset=3", loadType: wazeroir.LoadV128Type32zero, offset: 3, + name: "32zero offset=3", loadType: wazeroir.V128LoadType32zero, offset: 3, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 0xff, 8, 9, 0xff, @@ -287,7 +420,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "64zero offset=0", loadType: wazeroir.LoadV128Type64zero, offset: 0, + name: "64zero offset=0", loadType: wazeroir.V128LoadType64zero, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -300,7 +433,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "64zero offset=2", loadType: wazeroir.LoadV128Type64zero, offset: 2, + name: "64zero offset=2", loadType: wazeroir.V128LoadType64zero, offset: 2, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -313,7 +446,7 @@ func TestCompiler_compileV128Load(t *testing.T) { }, }, { - name: "8splat offset=0", loadType: wazeroir.LoadV128Type8Splat, offset: 0, + name: "8splat offset=0", loadType: wazeroir.V128LoadType8Splat, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -322,7 +455,7 @@ func TestCompiler_compileV128Load(t *testing.T) { exp: [16]byte{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, }, { - name: "8splat offset=1", loadType: wazeroir.LoadV128Type8Splat, offset: 1, + name: "8splat offset=1", loadType: wazeroir.V128LoadType8Splat, offset: 1, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -332,7 +465,7 @@ func TestCompiler_compileV128Load(t *testing.T) { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, }, { - name: "16splat offset=0", loadType: wazeroir.LoadV128Type16Splat, offset: 0, + name: "16splat offset=0", loadType: wazeroir.V128LoadType16Splat, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -341,7 +474,7 @@ func TestCompiler_compileV128Load(t *testing.T) { exp: [16]byte{1, 0xff, 1, 0xff, 1, 0xff, 1, 0xff, 1, 0xff, 1, 0xff, 1, 0xff, 1, 0xff}, }, { - name: "16splat offset=5", loadType: wazeroir.LoadV128Type16Splat, offset: 5, + name: "16splat offset=5", loadType: wazeroir.V128LoadType16Splat, offset: 5, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -350,7 +483,7 @@ func TestCompiler_compileV128Load(t *testing.T) { exp: [16]byte{6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7}, }, { - name: "32splat offset=0", loadType: wazeroir.LoadV128Type32Splat, offset: 0, + name: "32splat offset=0", loadType: wazeroir.V128LoadType32Splat, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -359,7 +492,7 @@ func TestCompiler_compileV128Load(t *testing.T) { exp: [16]byte{1, 0xff, 3, 0xff, 1, 0xff, 3, 0xff, 1, 0xff, 3, 0xff, 1, 0xff, 3, 0xff}, }, { - name: "32splat offset=1", loadType: wazeroir.LoadV128Type32Splat, offset: 1, + name: "32splat offset=1", loadType: wazeroir.V128LoadType32Splat, offset: 1, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -368,7 +501,7 @@ func TestCompiler_compileV128Load(t *testing.T) { exp: [16]byte{0xff, 3, 0xff, 5, 0xff, 3, 0xff, 5, 0xff, 3, 0xff, 5, 0xff, 3, 0xff, 5}, }, { - name: "64splat offset=0", loadType: wazeroir.LoadV128Type64Splat, offset: 0, + name: "64splat offset=0", loadType: wazeroir.V128LoadType64Splat, offset: 0, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -377,7 +510,7 @@ func TestCompiler_compileV128Load(t *testing.T) { exp: [16]byte{1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 1, 0xff, 3, 0xff, 5, 6, 7, 0xff}, }, { - name: "64splat offset=1", loadType: wazeroir.LoadV128Type64Splat, offset: 1, + name: "64splat offset=1", loadType: wazeroir.V128LoadType64Splat, offset: 1, memSetupFn: func(buf []byte) { copy(buf, []byte{ 1, 0xff, 3, 0xff, 5, 6, 7, 0xff, 9, 0xff, @@ -1557,6 +1690,18 @@ func i8ToU8(v int8) byte { return byte(v) } +func i16ToU16(v int16) uint16 { + return uint16(v) +} + +func i32ToU32(v int32) uint32 { + return uint32(v) +} + +func i64ToU64(v int64) uint64 { + return uint64(v) +} + func TestCompiler_compileV128Swizzle(t *testing.T) { tests := []struct { @@ -3274,3 +3419,4011 @@ func TestCompiler_compileV128Cmp(t *testing.T) { }) } } + +func TestCompiler_compileV128AvgrU(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + x1, x2, exp [16]byte + }{ + { + name: "i8x16", + shape: wazeroir.ShapeI8x16, + x1: [16]byte{0: 1, 2: 10, 10: 10, 15: math.MaxUint8}, + x2: [16]byte{0: 10, 4: 5, 10: 5, 15: 10}, + exp: [16]byte{ + 0: byte((uint16(1) + uint16(10) + 1) / 2), + 2: byte((uint16(10) + 1) / 2), + 4: byte((uint16(5) + 1) / 2), + 10: byte((uint16(10) + uint16(5) + 1) / 2), + 15: byte((uint16(math.MaxUint8) + uint16(10) + 1) / 2), + }, + }, + { + name: "i16x8", + shape: wazeroir.ShapeI16x8, + x1: i16x8(1, 0, 100, 0, 0, math.MaxUint16, 0, 0), + x2: i16x8(10, 0, math.MaxUint16, 0, 0, 1, 0, 0), + exp: i16x8( + uint16((uint32(1)+uint32(10)+1)/2), + 0, + uint16((uint32(100)+uint32(math.MaxUint16)+1)/2), + 0, + 0, + uint16((uint32(1)+uint32(math.MaxUint16)+1)/2), + 0, 0, + ), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128AvgrU(&wazeroir.OperationV128AvgrU{Shape: tc.shape}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Sqrt(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + v, exp [16]byte + }{ + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + v: f32x4(1.23, -123.1231, math.MaxFloat32, float32(math.Inf(1))), + exp: f32x4( + float32(math.Sqrt(float64(float32(1.23)))), + float32(math.Sqrt(float64(float32(-123.1231)))), + float32(math.Sqrt(float64(float32(math.MaxFloat32)))), + float32(math.Sqrt(float64(float32(math.Inf(1))))), + ), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + v: f64x2(1.2314, math.MaxFloat64), + exp: f64x2(math.Sqrt(1.2314), math.Sqrt(math.MaxFloat64)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Sqrt(&wazeroir.OperationV128Sqrt{Shape: tc.shape}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Mul(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + x1, x2, exp [16]byte + }{ + { + name: "i16x8", + shape: wazeroir.ShapeI16x8, + x1: i16x8(1123, 0, 123, 1, 1, 5, 8, 1), + x2: i16x8(0, 123, 123, 0, 1, 5, 9, 1), + exp: i16x8(0, 0, 123*123, 0, 1, 25, 8*9, 1), + }, + { + name: "i32x4", + shape: wazeroir.ShapeI32x4, + x1: i32x4(i32ToU32(-123), 5, 4, math.MaxUint32), + x2: i32x4(i32ToU32(-10), 1, i32ToU32(-104), 0), + exp: i32x4(1230, 5, i32ToU32(-416), 0), + }, + { + name: "i64x2", + shape: wazeroir.ShapeI64x2, + x1: i64x2(1, 12345), + x2: i64x2(100, i64ToU64(-10)), + exp: i64x2(100, i64ToU64(-123450)), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(1.0, 123, float32(math.Inf(1)), float32(math.Inf(-1))), + x2: f32x4(51234.12341, 123, math.MaxFloat32, -123), + exp: f32x4(51234.12341, 123*123, float32(math.Inf(1)), float32(math.Inf(1))), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(1.123, math.Inf(1)), + x2: f64x2(1.123, math.MinInt64), + exp: f64x2(1.123*1.123, math.Inf(-1)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Mul(&wazeroir.OperationV128Mul{Shape: tc.shape}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Neg(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + v, exp [16]byte + }{ + { + name: "i8x16", + shape: wazeroir.ShapeI8x16, + v: [16]byte{1: 123, 5: i8ToU8(-1), 15: i8ToU8(-125)}, + exp: [16]byte{1: i8ToU8(-123), 5: 1, 15: 125}, + }, + { + name: "i16x8", + shape: wazeroir.ShapeI16x8, + v: i16x8(0, 0, i16ToU16(-123), 0, 1, 25, 8, i16ToU16(-1)), + exp: i16x8(0, 0, 123, 0, i16ToU16(-1), i16ToU16(-25), i16ToU16(-8), 1), + }, + { + name: "i32x4", + shape: wazeroir.ShapeI32x4, + v: i32x4(1230, 5, i32ToU32(-416), 0), + exp: i32x4(i32ToU32(-1230), i32ToU32(-5), 416, 0), + }, + { + name: "i64x2", + shape: wazeroir.ShapeI64x2, + v: i64x2(100, i64ToU64(-123450)), + exp: i64x2(i64ToU64(-100), 123450), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + v: f32x4(51234.12341, -123, float32(math.Inf(1)), 0.1), + exp: f32x4(-51234.12341, 123, float32(math.Inf(-1)), -0.1), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + v: f32x4(51234.12341, 0, float32(math.Inf(1)), 0.1), + exp: f32x4(-51234.12341, float32(math.Copysign(0, -1)), float32(math.Inf(-1)), -0.1), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + v: f64x2(1.123, math.Inf(-1)), + exp: f64x2(-1.123, math.Inf(1)), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + v: f64x2(0, math.Inf(-1)), + exp: f64x2(math.Copysign(0, -1), math.Inf(1)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Neg(&wazeroir.OperationV128Neg{Shape: tc.shape}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Abs(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + v, exp [16]byte + }{ + { + name: "i8x16", + shape: wazeroir.ShapeI8x16, + v: [16]byte{1: 123, 5: i8ToU8(-1), 15: i8ToU8(-125)}, + exp: [16]byte{1: 123, 5: 1, 15: 125}, + }, + { + name: "i16x8", + shape: wazeroir.ShapeI16x8, + v: i16x8(0, 0, i16ToU16(-123), 0, 1, 25, 8, i16ToU16(-1)), + exp: i16x8(0, 0, 123, 0, 1, 25, 8, 1), + }, + { + name: "i32x4", + shape: wazeroir.ShapeI32x4, + v: i32x4(i32ToU32(-1230), 5, i32ToU32(-416), 0), + exp: i32x4(1230, 5, 416, 0), + }, + { + name: "i64x2", + shape: wazeroir.ShapeI64x2, + v: i64x2(i64ToU64(-100), i64ToU64(-123450)), + exp: i64x2(100, 123450), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + v: f32x4(51234.12341, -123, float32(math.Inf(1)), 0.1), + exp: f32x4(51234.12341, 123, float32(math.Inf(1)), 0.1), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + v: f32x4(51234.12341, 0, float32(math.Inf(1)), -0.1), + exp: f32x4(51234.12341, 0, float32(math.Inf(1)), 0.1), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + v: f64x2(-1.123, math.Inf(-1)), + exp: f64x2(1.123, math.Inf(1)), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + v: f64x2(0, math.Inf(-1)), + exp: f64x2(0, math.Inf(1)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Abs(&wazeroir.OperationV128Abs{Shape: tc.shape}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Div(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + x1, x2, exp [16]byte + }{ + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(1.0, 123, float32(math.Inf(1)), float32(math.Inf(-1))), + x2: f32x4(123.12, 123, math.MaxFloat32, -123), + exp: f32x4(float32(1.0)/float32(123.12), 1, float32(math.Inf(1)), float32(math.Inf(1))), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(1.123, math.Inf(1)), + x2: f64x2(1.123, math.MinInt64), + exp: f64x2(1.0, math.Inf(-1)), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(0, math.Inf(1)), + x2: f64x2(1.123, math.MaxInt64), + exp: f64x2(0, math.Inf(1)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Div(&wazeroir.OperationV128Div{Shape: tc.shape}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Min(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + signed bool + x1, x2, exp [16]byte + }{ + { + name: "i8x16s", + shape: wazeroir.ShapeI8x16, + signed: true, + x1: [16]byte{0: 123, 5: i8ToU8(-1), 15: 2}, + x2: [16]byte{0: 1, 5: 0, 15: i8ToU8(-1)}, + exp: [16]byte{0: 1, 5: i8ToU8(-1), 15: i8ToU8(-1)}, + }, + { + name: "i8x16u", + shape: wazeroir.ShapeI8x16, + signed: false, + x1: [16]byte{0: 123, 5: i8ToU8(-1), 15: 2}, + x2: [16]byte{0: 1, 5: 0, 15: i8ToU8(-1)}, + exp: [16]byte{0: 1, 5: 0, 15: 2}, + }, + { + name: "i16x8s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(1123, 0, 123, 1, 1, 6, i16ToU16(-123), 1), + x2: i16x8(0, 123, i16ToU16(-123), 3, 1, 4, 5, 1), + exp: i16x8(0, 0, i16ToU16(-123), 1, 1, 4, i16ToU16(-123), 1), + }, + { + name: "i16x8u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(1123, 0, 123, 1, 1, 6, i16ToU16(-123), 1), + x2: i16x8(0, 123, i16ToU16(-123), 3, 1, 4, 5, 1), + exp: i16x8(0, 0, 123, 1, 1, 4, 5, 1), + }, + { + name: "i32x4s", + shape: wazeroir.ShapeI32x4, + signed: true, + x1: i32x4(i32ToU32(-123), 0, 1, i32ToU32(math.MinInt32)), + x2: i32x4(123, 5, 1, 0), + exp: i32x4(i32ToU32(-123), 0, 1, i32ToU32(math.MinInt32)), + }, + { + name: "i32x4u", + shape: wazeroir.ShapeI32x4, + signed: false, + x1: i32x4(i32ToU32(-123), 0, 1, i32ToU32(math.MinInt32)), + x2: i32x4(123, 5, 1, 0), + exp: i32x4(123, 0, 1, 0), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(float32(math.NaN()), -123.12, 2.3, float32(math.Inf(1))), + x2: f32x4(5.5, 123.12, 5.0, float32(math.Inf(-1))), + exp: f32x4(float32(math.NaN()), -123.12, 2.3, float32(math.Inf(-1))), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(5.5, 123.12, -5.0, float32(math.Inf(-1))), + x2: f32x4(-123.12, float32(math.NaN()), 2.3, float32(math.Inf(-1))), + exp: f32x4(-123.12, float32(math.NaN()), -5.0, float32(math.Inf(-1))), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + x2: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + x2: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.MinInt64, 0), + x2: f64x2(math.MaxInt64, -12.3), + exp: f64x2(math.MinInt64, -12.3), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.MaxInt64, -12.3), + x2: f64x2(math.MinInt64, 0), + exp: f64x2(math.MinInt64, -12.3), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.NaN(), math.NaN()), + x2: f64x2(math.Inf(1), math.Inf(-1)), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.Inf(1), math.Inf(-1)), + x2: f64x2(math.NaN(), math.NaN()), + exp: f64x2(math.NaN(), math.NaN()), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Min(&wazeroir.OperationV128Min{Shape: tc.shape, Signed: tc.signed}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + switch tc.shape { + case wazeroir.ShapeF64x2: + for _, vs := range [][2]float64{ + {math.Float64frombits(lo), math.Float64frombits(binary.LittleEndian.Uint64(tc.exp[:8]))}, + {math.Float64frombits(hi), math.Float64frombits(binary.LittleEndian.Uint64(tc.exp[8:]))}, + } { + actual, exp := vs[0], vs[1] + if math.IsNaN(exp) { + require.True(t, math.IsNaN(actual)) + } else { + require.Equal(t, exp, actual) + } + } + case wazeroir.ShapeF32x4: + for _, vs := range [][2]float32{ + {math.Float32frombits(uint32(lo)), math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[:4]))}, + {math.Float32frombits(uint32(lo >> 32)), math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[4:8]))}, + {math.Float32frombits(uint32(hi)), math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[8:12]))}, + {math.Float32frombits(uint32(hi >> 32)), math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[12:]))}, + } { + actual, exp := vs[0], vs[1] + if math.IsNaN(float64(exp)) { + require.True(t, math.IsNaN(float64(actual))) + } else { + require.Equal(t, exp, actual) + } + } + default: + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + } + }) + } +} + +func TestCompiler_compileV128Max(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + signed bool + x1, x2, exp [16]byte + }{ + { + name: "i8x16s", + shape: wazeroir.ShapeI8x16, + signed: true, + x1: [16]byte{0: 123, 5: i8ToU8(-1), 15: 2}, + x2: [16]byte{0: 1, 5: 0, 15: i8ToU8(-1)}, + exp: [16]byte{0: 123, 5: 0, 15: 2}, + }, + { + name: "i8x16u", + shape: wazeroir.ShapeI8x16, + signed: false, + x1: [16]byte{0: 123, 5: i8ToU8(-1), 15: 2}, + x2: [16]byte{0: 1, 5: 0, 15: i8ToU8(-1)}, + exp: [16]byte{0: 123, 5: i8ToU8(-1), 15: i8ToU8(-1)}, + }, + { + name: "i16x8s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(1123, 0, 123, 1, 1, 6, i16ToU16(-123), 1), + x2: i16x8(0, 123, i16ToU16(-123), 3, 1, 4, 5, 1), + exp: i16x8(1123, 123, 123, 3, 1, 6, 5, 1), + }, + { + name: "i16x8u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(1123, 0, 123, 1, 1, 6, i16ToU16(-123), 1), + x2: i16x8(0, 123, i16ToU16(-123), 3, 1, 4, 5, 1), + exp: i16x8(1123, 123, i16ToU16(-123), 3, 1, 6, i16ToU16(-123), 1), + }, + { + name: "i32x4s", + shape: wazeroir.ShapeI32x4, + signed: true, + x1: i32x4(i32ToU32(-123), 0, 1, i32ToU32(math.MinInt32)), + x2: i32x4(123, 5, 1, 0), + exp: i32x4(123, 5, 1, 0), + }, + { + name: "i32x4u", + shape: wazeroir.ShapeI32x4, + signed: false, + x1: i32x4(i32ToU32(-123), 0, 1, i32ToU32(math.MinInt32)), + x2: i32x4(123, 5, 1, 0), + exp: i32x4(i32ToU32(-123), 5, 1, i32ToU32(math.MinInt32)), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(float32(math.NaN()), -123.12, 2.3, float32(math.Inf(1))), + x2: f32x4(5.5, 123.12, 5.0, float32(math.Inf(-1))), + exp: f32x4(float32(math.NaN()), 123.12, 5.0, float32(math.Inf(1))), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(5.5, 123.12, -5.0, float32(math.Inf(-1))), + x2: f32x4(-123.12, float32(math.NaN()), 2.3, float32(math.Inf(-1))), + exp: f32x4(5.5, float32(math.NaN()), 2.3, float32(math.Inf(-1))), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + x2: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + }, + { + name: "f32x4", + shape: wazeroir.ShapeF32x4, + x1: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + x2: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.MinInt64, 0), + x2: f64x2(math.MaxInt64, -12.3), + exp: f64x2(math.MaxInt64, 0), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.MaxInt64, -12.3), + x2: f64x2(math.MinInt64, 0), + exp: f64x2(math.MaxInt64, 0), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.NaN(), -12.3), + x2: f64x2(math.MinInt64, math.NaN()), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.MinInt64, math.NaN()), + x2: f64x2(math.NaN(), -12.3), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.NaN(), math.NaN()), + x2: f64x2(math.Inf(1), math.Inf(-1)), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "f64x2", + shape: wazeroir.ShapeF64x2, + x1: f64x2(math.Inf(1), math.Inf(-1)), + x2: f64x2(math.NaN(), math.NaN()), + exp: f64x2(math.NaN(), math.NaN()), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Max(&wazeroir.OperationV128Max{Shape: tc.shape, Signed: tc.signed}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + switch tc.shape { + case wazeroir.ShapeF64x2: + for _, vs := range [][2]float64{ + {math.Float64frombits(lo), math.Float64frombits(binary.LittleEndian.Uint64(tc.exp[:8]))}, + {math.Float64frombits(hi), math.Float64frombits(binary.LittleEndian.Uint64(tc.exp[8:]))}, + } { + actual, exp := vs[0], vs[1] + if math.IsNaN(exp) { + require.True(t, math.IsNaN(actual)) + } else { + require.Equal(t, exp, actual) + } + } + case wazeroir.ShapeF32x4: + for _, vs := range [][2]float32{ + {math.Float32frombits(uint32(lo)), math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[:4]))}, + {math.Float32frombits(uint32(lo >> 32)), math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[4:8]))}, + {math.Float32frombits(uint32(hi)), math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[8:12]))}, + {math.Float32frombits(uint32(hi >> 32)), math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[12:]))}, + } { + actual, exp := vs[0], vs[1] + if math.IsNaN(float64(exp)) { + require.True(t, math.IsNaN(float64(actual))) + } else { + require.Equal(t, exp, actual) + } + } + default: + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + } + }) + } +} + +func TestCompiler_compileV128AddSat(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + signed bool + x1, x2, exp [16]byte + }{ + { + name: "i8x16s", + shape: wazeroir.ShapeI8x16, + signed: true, + x1: [16]byte{ + 0: i8ToU8(math.MaxInt8), + 5: i8ToU8(-1), + 15: i8ToU8(math.MinInt8), + }, + x2: [16]byte{ + 0: 1, + 5: 0, + 15: i8ToU8(-1), + }, + exp: [16]byte{ + 0: i8ToU8(math.MaxInt8), + 5: i8ToU8(-1), + 15: i8ToU8(math.MinInt8), + }, + }, + { + name: "i8x16u", + shape: wazeroir.ShapeI8x16, + signed: false, + x1: [16]byte{ + 0: i8ToU8(math.MaxInt8), + 5: 0, + 15: math.MaxUint8, + }, + x2: [16]byte{ + 0: 1, + 5: i8ToU8(-1), + 15: 1, + }, + exp: [16]byte{ + 0: i8ToU8(math.MaxInt8) + 1, + 5: i8ToU8(-1), + 15: math.MaxUint8, + }, + }, + { + name: "i16x8s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(i16ToU16(math.MinInt16), 0, 123, 1, 1, 6, i16ToU16(-123), i16ToU16(math.MaxInt16)), + x2: i16x8(i16ToU16(-1), 123, i16ToU16(-123), 3, 1, 4, 5, 1), + exp: i16x8(i16ToU16(math.MinInt16), 123, 0, 4, 2, 10, i16ToU16(-118), i16ToU16(math.MaxInt16)), + }, + { + name: "i16x8u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(1123, 0, 123, 1, 1, 6, i16ToU16(-123), math.MaxUint16), + x2: i16x8(0, 123, math.MaxUint16, 3, 1, 4, 0, 1), + exp: i16x8(1123, 123, math.MaxUint16, 4, 2, 10, i16ToU16(-123), math.MaxUint16), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128AddSat(&wazeroir.OperationV128AddSat{Shape: tc.shape, Signed: tc.signed}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128SubSat(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + signed bool + x1, x2, exp [16]byte + }{ + { + name: "i8x16s", + shape: wazeroir.ShapeI8x16, + signed: true, + x1: [16]byte{ + 0: i8ToU8(math.MinInt8), + 5: i8ToU8(-1), + 15: i8ToU8(math.MaxInt8), + }, + x2: [16]byte{ + 0: 1, + 5: 0, + 15: i8ToU8(-1), + }, + exp: [16]byte{ + 0: i8ToU8(math.MinInt8), + 5: i8ToU8(-1), + 15: i8ToU8(math.MaxInt8), + }, + }, + { + name: "i8x16u", + shape: wazeroir.ShapeI8x16, + signed: false, + x1: [16]byte{ + 0: i8ToU8(math.MinInt8), + 5: i8ToU8(-1), + 15: 0, + }, + x2: [16]byte{ + 0: 1, + 5: 0, + 15: 1, + }, + exp: [16]byte{ + 0: i8ToU8(math.MinInt8) - 1, + 5: i8ToU8(-1), + 15: 0, + }, + }, + { + name: "i16x8s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(i16ToU16(math.MinInt16), 0, 123, 1, 1, 6, i16ToU16(-123), i16ToU16(math.MaxInt16)), + x2: i16x8(1, 123, i16ToU16(-123), 3, 1, 4, 5, i16ToU16(-123)), + exp: i16x8(i16ToU16(math.MinInt16), i16ToU16(-123), 246, i16ToU16(-2), 0, 2, i16ToU16(-128), i16ToU16(math.MaxInt16)), + }, + { + name: "i16x8u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(1123, 0, 123, 1, 1, 6, 200, math.MaxUint16), + x2: i16x8(0, 123, math.MaxUint16, 3, 1, 4, i16ToU16(-1), 12), + exp: i16x8(1123, 0, 0, 0, 0, 2, 0, math.MaxUint16-12), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128SubSat(&wazeroir.OperationV128SubSat{Shape: tc.shape, Signed: tc.signed}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Popcnt(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + v, exp [16]byte + }{ + { + name: "ones", + v: [16]byte{ + 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 0, 1 << 2, 0, 1 << 4, 0, 1 << 6, 0, 0, + }, + exp: [16]byte{ + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 0, 1, 0, 1, 0, 0, + }, + }, + { + name: "mix", + v: [16]byte{ + 0b1, 0b11, 0b111, 0b1111, 0b11111, 0b111111, 0b1111111, 0b11111111, + 0b10000001, 0b10000010, 0b10000100, 0b10001000, 0b10010000, 0b10100000, 0b11000000, 0, + }, + exp: [16]byte{ + 1, 2, 3, 4, 5, 6, 7, 8, + 2, 2, 2, 2, 2, 2, 2, 0, + }, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Popcnt(&wazeroir.OperationV128Popcnt{}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Round(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + kind wazeroir.OperationKind + v [16]byte + }{ + { + name: "f32 ceil", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Ceil, + v: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + }, + { + name: "f32 ceil", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Ceil, + v: f32x4(math.Pi, -1231231.123, float32(math.NaN()), float32(math.Inf(-1))), + }, + { + name: "f64 ceil", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Ceil, + v: f64x2(1.231, -123.12313), + }, + { + name: "f64 ceil", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Ceil, + v: f64x2(math.Inf(1), math.NaN()), + }, + { + name: "f64 ceil", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Ceil, + v: f64x2(math.Inf(-1), math.Pi), + }, + { + name: "f32 floor", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Floor, + v: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + }, + { + name: "f32 floor", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Floor, + v: f32x4(math.Pi, -1231231.123, float32(math.NaN()), float32(math.Inf(-1))), + }, + { + name: "f64 floor", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Floor, + v: f64x2(1.231, -123.12313), + }, + { + name: "f64 floor", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Floor, + v: f64x2(math.Inf(1), math.NaN()), + }, + { + name: "f64 floor", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Floor, + v: f64x2(math.Inf(-1), math.Pi), + }, + { + name: "f32 trunc", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Trunc, + v: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + }, + { + name: "f32 trunc", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Trunc, + v: f32x4(math.Pi, -1231231.123, float32(math.NaN()), float32(math.Inf(-1))), + }, + { + name: "f64 trunc", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Trunc, + v: f64x2(1.231, -123.12313), + }, + { + name: "f64 trunc", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Trunc, + v: f64x2(math.Inf(1), math.NaN()), + }, + { + name: "f64 trunc", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Trunc, + v: f64x2(math.Inf(-1), math.Pi), + }, + { + name: "f32 nearest", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Nearest, + v: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + }, + { + name: "f32 nearest", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Nearest, + v: f32x4(math.Pi, -1231231.123, float32(math.NaN()), float32(math.Inf(-1))), + }, + { + name: "f64 nearest", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Nearest, + v: f64x2(1.231, -123.12313), + }, + { + name: "f64 nearest", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Nearest, + v: f64x2(math.Inf(1), math.NaN()), + }, + { + name: "f64 nearest", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Nearest, + v: f64x2(math.Inf(-1), math.Pi), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + is32bit := tc.shape == wazeroir.ShapeF32x4 + switch tc.kind { + case wazeroir.OperationKindV128Ceil: + err = compiler.compileV128Ceil(&wazeroir.OperationV128Ceil{Shape: tc.shape}) + case wazeroir.OperationKindV128Floor: + err = compiler.compileV128Floor(&wazeroir.OperationV128Floor{Shape: tc.shape}) + case wazeroir.OperationKindV128Trunc: + err = compiler.compileV128Trunc(&wazeroir.OperationV128Trunc{Shape: tc.shape}) + case wazeroir.OperationKindV128Nearest: + err = compiler.compileV128Nearest(&wazeroir.OperationV128Nearest{Shape: tc.shape}) + } + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + + if is32bit { + actualFs := [4]float32{ + math.Float32frombits(uint32(lo)), + math.Float32frombits(uint32(lo >> 32)), + math.Float32frombits(uint32(hi)), + math.Float32frombits(uint32(hi >> 32))} + f1Original, f2Original, f3Original, f4Original := + math.Float32frombits(binary.LittleEndian.Uint32(tc.v[:4])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.v[4:8])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.v[8:12])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.v[12:])) + + var expFs [4]float32 + switch tc.kind { + case wazeroir.OperationKindV128Ceil: + expFs[0] = float32(math.Ceil(float64(f1Original))) + expFs[1] = float32(math.Ceil(float64(f2Original))) + expFs[2] = float32(math.Ceil(float64(f3Original))) + expFs[3] = float32(math.Ceil(float64(f4Original))) + case wazeroir.OperationKindV128Floor: + expFs[0] = float32(math.Floor(float64(f1Original))) + expFs[1] = float32(math.Floor(float64(f2Original))) + expFs[2] = float32(math.Floor(float64(f3Original))) + expFs[3] = float32(math.Floor(float64(f4Original))) + case wazeroir.OperationKindV128Trunc: + expFs[0] = float32(math.Trunc(float64(f1Original))) + expFs[1] = float32(math.Trunc(float64(f2Original))) + expFs[2] = float32(math.Trunc(float64(f3Original))) + expFs[3] = float32(math.Trunc(float64(f4Original))) + case wazeroir.OperationKindV128Nearest: + expFs[0] = moremath.WasmCompatNearestF32(f1Original) + expFs[1] = moremath.WasmCompatNearestF32(f2Original) + expFs[2] = moremath.WasmCompatNearestF32(f3Original) + expFs[3] = moremath.WasmCompatNearestF32(f4Original) + } + + for i := range expFs { + exp, actual := expFs[i], actualFs[i] + if math.IsNaN(float64(exp)) { + require.True(t, math.IsNaN(float64(actual))) + } else { + require.Equal(t, exp, actual) + } + } + } else { + actualFs := [2]float64{math.Float64frombits(lo), math.Float64frombits(hi)} + f1Original, f2Original := + math.Float64frombits(binary.LittleEndian.Uint64(tc.v[:8])), math.Float64frombits(binary.LittleEndian.Uint64(tc.v[8:])) + + var expFs [2]float64 + switch tc.kind { + case wazeroir.OperationKindV128Ceil: + expFs[0] = math.Ceil(f1Original) + expFs[1] = math.Ceil(f2Original) + case wazeroir.OperationKindV128Floor: + expFs[0] = math.Floor(f1Original) + expFs[1] = math.Floor(f2Original) + case wazeroir.OperationKindV128Trunc: + expFs[0] = math.Trunc(f1Original) + expFs[1] = math.Trunc(f2Original) + case wazeroir.OperationKindV128Nearest: + expFs[0] = moremath.WasmCompatNearestF64(f1Original) + expFs[1] = moremath.WasmCompatNearestF64(f2Original) + } + + for i := range expFs { + exp, actual := expFs[i], actualFs[i] + if math.IsNaN(exp) { + require.True(t, math.IsNaN(actual)) + } else { + require.Equal(t, exp, actual) + } + } + } + }) + } +} + +func TestCompiler_compileV128_Pmax_Pmin(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + kind wazeroir.OperationKind + x1, x2, exp [16]byte + }{ + { + name: "f32 pmin", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmin, + x1: f32x4(float32(math.Inf(1)), -1.5, 1123.5, float32(math.Inf(1))), + x2: f32x4(1.4, float32(math.Inf(-1)), -1231.5, float32(math.Inf(1))), + exp: f32x4(1.4, float32(math.Inf(-1)), -1231.5, float32(math.Inf(1))), + }, + { + name: "f32 pmin", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmin, + x1: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + x2: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + }, + { + name: "f32 pmin", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmin, + x1: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + x2: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + exp: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + }, + { + name: "f32 pmin", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmin, + x1: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + x2: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + exp: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + }, + { + name: "f32 pmin", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmin, + x1: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + x2: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + }, + { + name: "f64 pmin", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmin, + x1: f64x2(math.Inf(1), -123123.1231), + x2: f64x2(-123123.1, math.Inf(-1)), + exp: f64x2(-123123.1, math.Inf(-1)), + }, + { + name: "f64 pmin", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmin, + x1: f64x2(math.NaN(), math.NaN()), + x2: f64x2(-123123.1, 1.0), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "f64 pmin", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmin, + x1: f64x2(-123123.1, 1.0), + x2: f64x2(math.NaN(), math.NaN()), + exp: f64x2(-123123.1, 1.0), + }, + { + name: "f64 pmin", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmin, + x1: f64x2(math.NaN(), math.NaN()), + x2: f64x2(math.Inf(1), math.Inf(-1)), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "f64 pmin", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmin, + x1: f64x2(math.Inf(1), math.Inf(-1)), + x2: f64x2(math.NaN(), math.NaN()), + exp: f64x2(math.Inf(1), math.Inf(-1)), + }, + { + name: "f32 pmax", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmax, + x1: f32x4(float32(math.Inf(1)), -1.5, 1123.5, float32(math.Inf(1))), + x2: f32x4(1.4, float32(math.Inf(-1)), -1231.5, float32(math.Inf(1))), + exp: f32x4(float32(math.Inf(1)), -1.5, 1123.5, float32(math.Inf(1))), + }, + { + name: "f32 pmax", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmax, + x1: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + x2: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + }, + { + name: "f32 pmax", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmax, + x1: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + x2: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + exp: f32x4(1.4, -1.5, 1.5, float32(math.Inf(1))), + }, + { + name: "f32 pmax", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmax, + x1: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + x2: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + exp: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + }, + { + name: "f32 pmax", + shape: wazeroir.ShapeF32x4, + kind: wazeroir.OperationKindV128Pmax, + x1: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + x2: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(math.Inf(-1)), float32(math.Inf(1))), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), float32(math.NaN()), float32(math.NaN())), + }, + { + name: "f64 pmax", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmax, + x1: f64x2(math.Inf(1), -123123.1231), + x2: f64x2(-123123.1, math.Inf(-1)), + exp: f64x2(math.Inf(1), -123123.1231), + }, + { + name: "f64 pmax", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmax, + x1: f64x2(math.NaN(), math.NaN()), + x2: f64x2(-123123.1, 1.0), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "f64 pmax", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmax, + x1: f64x2(-123123.1, 1.0), + x2: f64x2(math.NaN(), math.NaN()), + exp: f64x2(-123123.1, 1.0), + }, + { + name: "f64 pmax", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmax, + x1: f64x2(math.NaN(), math.NaN()), + x2: f64x2(math.Inf(1), math.Inf(-1)), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "f64 pmax", + shape: wazeroir.ShapeF64x2, + kind: wazeroir.OperationKindV128Pmax, + x1: f64x2(math.Inf(1), math.Inf(-1)), + x2: f64x2(math.NaN(), math.NaN()), + exp: f64x2(math.Inf(1), math.Inf(-1)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + is32bit := tc.shape == wazeroir.ShapeF32x4 + switch tc.kind { + case wazeroir.OperationKindV128Pmin: + err = compiler.compileV128Pmin(&wazeroir.OperationV128Pmin{Shape: tc.shape}) + case wazeroir.OperationKindV128Pmax: + err = compiler.compileV128Pmax(&wazeroir.OperationV128Pmax{Shape: tc.shape}) + } + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + + if is32bit { + actualFs := [4]float32{ + math.Float32frombits(uint32(lo)), + math.Float32frombits(uint32(lo >> 32)), + math.Float32frombits(uint32(hi)), + math.Float32frombits(uint32(hi >> 32))} + expFs := [4]float32{ + math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[:4])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[4:8])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[8:12])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[12:])), + } + for i := range expFs { + exp, actual := expFs[i], actualFs[i] + if math.IsNaN(float64(exp)) { + require.True(t, math.IsNaN(float64(actual))) + } else { + require.Equal(t, exp, actual) + } + } + } else { + actualFs := [2]float64{ + math.Float64frombits(lo), math.Float64frombits(hi), + } + expFs := [2]float64{ + math.Float64frombits(binary.LittleEndian.Uint64(tc.exp[:8])), + math.Float64frombits(binary.LittleEndian.Uint64(tc.exp[8:])), + } + for i := range expFs { + exp, actual := expFs[i], actualFs[i] + if math.IsNaN(exp) { + require.True(t, math.IsNaN(actual)) + } else { + require.Equal(t, exp, actual) + } + } + } + }) + } +} + +func TestCompiler_compileV128ExtMul(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + signed, useLow bool + x1, x2, exp [16]byte + }{ + { + name: "i8x16s low", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: true, + x1: [16]byte{}, x2: [16]byte{}, + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i8x16s low", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: true, + x1: [16]byte{ + 255, 255, 255, 255, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + x2: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + exp: i16x8(128, 128, 128, 128, 128, 128, 128, 128), + }, + { + name: "i8x16s low", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: true, + x1: [16]byte{ + 255, 255, 255, 255, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + x2: [16]byte{ + 255, 255, 255, 255, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + }, + { + name: "i8x16s low", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: true, + x1: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + 0, 0, 0, 0, 0, 0, 0, 0, + }, + x2: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384), + }, + { + name: "i8x16s hi", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: false, + x1: [16]byte{}, x2: [16]byte{}, + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i8x16s hi", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: false, + x1: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, + }, + x2: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + exp: i16x8(128, 128, 128, 128, 128, 128, 128, 128), + }, + { + name: "i8x16s hi", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: false, + x1: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, + }, + x2: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, + }, + exp: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + }, + { + name: "i8x16s hi", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: false, + x1: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + x2: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + exp: i16x8(16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384), + }, + { + name: "i8x16u low", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: true, + x1: [16]byte{}, x2: [16]byte{}, + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i8x16u low", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: true, + x1: [16]byte{ + 255, 255, 255, 255, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + x2: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + 0, 0, 0, 0, + }, + exp: i16x8(32640, 32640, 32640, 32640, 32640, 32640, 32640, 32640), + }, + { + name: "i8x16u low", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: true, + x1: [16]byte{ + 255, 255, 255, 255, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + x2: [16]byte{ + 255, 255, 255, 255, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(i16ToU16(-511), i16ToU16(-511), i16ToU16(-511), i16ToU16(-511), + i16ToU16(-511), i16ToU16(-511), i16ToU16(-511), i16ToU16(-511)), + }, + { + name: "i8x16u low", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: true, + x1: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + 0, 0, 0, 0, 0, 0, 0, 0, + }, + x2: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384), + }, + { + name: "i8x16u hi", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: false, + x1: [16]byte{}, x2: [16]byte{}, + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i8x16u hi", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: false, + x1: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, + }, + x2: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + 0, 0, 0, 0, + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + exp: i16x8(32640, 32640, 32640, 32640, 32640, 32640, 32640, 32640), + }, + { + name: "i8x16u hi", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: false, + x1: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, + }, + x2: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, + }, + exp: i16x8(i16ToU16(-511), i16ToU16(-511), i16ToU16(-511), i16ToU16(-511), + i16ToU16(-511), i16ToU16(-511), i16ToU16(-511), i16ToU16(-511)), + }, + { + name: "i8x16u hi", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: false, + x1: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + x2: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + exp: i16x8(16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384), + }, + { + name: "i16x8s lo", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: true, + x1: [16]byte{}, + x2: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8s lo", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: true, + x1: i16x8( + 16383, 16383, 16383, 16383, + 0, 0, 1, 0, + ), + x2: i16x8( + 16384, 16384, 16384, 16384, + 0, 0, 1, 0, + ), + exp: i32x4(268419072, 268419072, 268419072, 268419072), + }, + { + name: "i16x8s lo", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: true, + x1: i16x8( + i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), + 0, 0, 1, 0, + ), + x2: i16x8( + i16ToU16(-32767), 0, i16ToU16(-32767), 0, + 0, 0, 1, 0, + ), + exp: i32x4(1073709056, 0, 1073709056, 0), + }, + { + name: "i16x8s lo", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: true, + x1: i16x8( + 65535, 65535, 65535, 65535, + 0, 0, 1, 0, + ), + x2: i16x8( + 65535, 0, 65535, 0, + 0, 0, 1, 0, + ), + exp: i32x4(1, 0, 1, 0), + }, + { + name: "i16x8s hi", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: false, + x1: [16]byte{}, + x2: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8s hi", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: false, + x1: i16x8( + 0, 0, 1, 0, + 16383, 16383, 16383, 16383, + ), + x2: i16x8( + 0, 0, 1, 0, + 16384, 16384, 16384, 16384, + ), + exp: i32x4(268419072, 268419072, 268419072, 268419072), + }, + { + name: "i16x8s hi", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: false, + x1: i16x8( + 0, 0, 1, 0, + i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), + ), + x2: i16x8( + 0, 0, 1, 0, + i16ToU16(-32767), 0, i16ToU16(-32767), 0, + ), + exp: i32x4(1073709056, 0, 1073709056, 0), + }, + { + name: "i16x8s hi", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: false, + x1: i16x8( + 0, 0, 1, 0, + 65535, 65535, 65535, 65535, + ), + x2: i16x8( + 0, 0, 1, 0, + + 65535, 0, 65535, 0, + ), + exp: i32x4(1, 0, 1, 0), + }, + { + name: "i16x8u lo", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: true, + x1: [16]byte{}, + x2: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8u lo", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: true, + x1: i16x8( + 16383, 16383, 16383, 16383, + 0, 0, 1, 0, + ), + x2: i16x8( + 16384, 16384, 16384, 16384, + 0, 0, 1, 0, + ), + exp: i32x4(268419072, 268419072, 268419072, 268419072), + }, + { + name: "i16x8u lo", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: true, + x1: i16x8( + i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), + 0, 0, 1, 0, + ), + x2: i16x8( + i16ToU16(-32767), 0, i16ToU16(-32767), 0, + 0, 0, 1, 0, + ), + exp: i32x4(1073774592, 0, 1073774592, 0), + }, + { + name: "i16x8u lo", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: true, + x1: i16x8( + 65535, 65535, 65535, 65535, + 0, 0, 1, 0, + ), + x2: i16x8( + 65535, 0, 65535, 0, + 0, 0, 1, 0, + ), + exp: i32x4(i32ToU32(-131071), 0, i32ToU32(-131071), 0), + }, + { + name: "i16x8u hi", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: false, + x1: [16]byte{}, + x2: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8u hi", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: false, + x1: i16x8( + 0, 0, 1, 0, + 16383, 16383, 16383, 16383, + ), + x2: i16x8( + 0, 0, 1, 0, + 16384, 16384, 16384, 16384, + ), + exp: i32x4(268419072, 268419072, 268419072, 268419072), + }, + { + name: "i16x8u hi", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: false, + x1: i16x8( + 0, 0, 1, 0, + i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), + ), + x2: i16x8( + 0, 0, 1, 0, + i16ToU16(-32767), 0, i16ToU16(-32767), 0, + ), + exp: i32x4(1073774592, 0, 1073774592, 0), + }, + { + name: "i16x8u hi", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: false, + x1: i16x8( + 0, 0, 1, 0, + 65535, 65535, 65535, 65535, + ), + x2: i16x8( + 0, 0, 1, 0, + 65535, 0, 65535, 0, + ), + exp: i32x4(i32ToU32(-131071), 0, i32ToU32(-131071), 0), + }, + { + name: "i32x4s lo", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: true, + x1: [16]byte{}, + x2: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i32x4s lo", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: true, + x1: i32x4( + 1, i32ToU32(-1), + 0, 0, + ), + x2: i32x4( + i32ToU32(-1), 1, + 0, 0, + ), + exp: i64x2(i64ToU64(-1), i64ToU64(-1)), + }, + { + name: "i32x4s lo", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: true, + x1: i32x4( + 1073741824, 4294967295, + 0, 0, + ), + x2: i32x4( + 1073741824, 4294967295, + 0, 0, + ), + exp: i64x2(1152921504606846976, 1), + }, + { + name: "i32x4s hi", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: false, + x1: [16]byte{}, + x2: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i32x4s hi", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: false, + x1: i32x4( + 0, 0, + 1, i32ToU32(-1), + ), + x2: i32x4( + 0, 0, + i32ToU32(-1), 1, + ), + exp: i64x2(i64ToU64(-1), i64ToU64(-1)), + }, + { + name: "i32x4s hi", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: false, + x1: i32x4( + 0, 0, + 1073741824, 4294967295, + ), + x2: i32x4( + 0, 0, + 1073741824, 4294967295, + ), + exp: i64x2(1152921504606846976, 1), + }, + { + name: "i32x4u lo", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: true, + x1: [16]byte{}, + x2: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i32x4u lo", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: true, + x1: i32x4( + 1, i32ToU32(-1), + 0, 0, + ), + x2: i32x4( + i32ToU32(-1), 1, + 0, 0, + ), + exp: i64x2(4294967295, 4294967295), + }, + { + name: "i32x4u lo", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: true, + x1: i32x4( + 1073741824, 4294967295, + 0, 0, + ), + x2: i32x4( + 1073741824, 4294967295, + 0, 0, + ), + exp: i64x2(1152921504606846976, i64ToU64(-8589934591)), + }, + { + name: "i32x4u hi", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: false, + x1: [16]byte{}, + x2: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i32x4u hi", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: false, + x1: i32x4( + 0, 0, + 1, i32ToU32(-1), + ), + x2: i32x4( + 0, 0, + i32ToU32(-1), 1, + ), + exp: i64x2(4294967295, 4294967295), + }, + { + name: "i32x4u hi", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: false, + x1: i32x4( + 0, 0, + 1073741824, 4294967295, + ), + x2: i32x4( + 0, 0, + 1073741824, 4294967295, + ), + exp: i64x2(1152921504606846976, i64ToU64(-8589934591)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128ExtMul(&wazeroir.OperationV128ExtMul{ + OriginShape: tc.shape, Signed: tc.signed, UseLow: tc.useLow, + }) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Extend(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + signed, useLow bool + v, exp [16]byte + }{ + { + name: "i8x16s hi", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: false, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i8x16s hi", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: false, + v: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + }, + exp: i16x8(i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1)), + }, + { + name: "i8x16s hi", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: false, + v: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + }, + exp: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + }, + { + name: "i8x16s hi", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: false, + v: [16]byte{ + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i8x16s lo", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: true, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i8x16s lo", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: true, + v: [16]byte{ + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1)), + }, + { + name: "i8x16s lo", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: true, + v: [16]byte{ + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + }, + { + name: "i8x16s lo", + shape: wazeroir.ShapeI8x16, + signed: true, + useLow: true, + v: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + }, + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + // unsigned + { + name: "i8x16u hi", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: false, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i8x16u hi", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: false, + v: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + }, + exp: i16x8(255, 255, 255, 255, 255, 255, 255, 255), + }, + { + name: "i8x16u hi", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: false, + v: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + }, + exp: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + }, + { + name: "i8x16u hi", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: false, + v: [16]byte{ + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i8x16u lo", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: true, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i8x16u lo", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: true, + v: [16]byte{ + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(255, 255, 255, 255, 255, 255, 255, 255), + }, + { + name: "i8x16u lo", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: true, + v: [16]byte{ + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + exp: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + }, + { + name: "i8x16u lo", + shape: wazeroir.ShapeI8x16, + signed: false, + useLow: true, + v: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + }, + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i16x8s hi", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: false, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8s hi", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: false, + v: i16x8(1, 1, 1, 1, 0, 0, 0, 0), + exp: i32x4(0, 0, 0, 0), + }, + { + name: "i16x8s hi", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: false, + v: i16x8(0, 0, 0, 0, i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1)), + exp: i32x4(i32ToU32(-1), i32ToU32(-1), i32ToU32(-1), i32ToU32(-1)), + }, + { + name: "i16x8s hi", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: false, + v: i16x8(0, 0, 0, 0, 123, 0, 123, 0), + exp: i32x4(123, 0, 123, 0), + }, + { + name: "i16x8s lo", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: true, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8s lo", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: true, + v: i16x8(0, 0, 0, 0, 1, 1, 1, 1), + exp: i32x4(0, 0, 0, 0), + }, + { + name: "i16x8s lo", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: true, + v: i16x8(i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), 0, 0, 0, 0), + exp: i32x4(i32ToU32(-1), i32ToU32(-1), i32ToU32(-1), i32ToU32(-1)), + }, + { + name: "i16x8s lo", + shape: wazeroir.ShapeI16x8, + signed: true, + useLow: true, + v: i16x8(123, 0, 123, 0, 0, 0, 0, 0), + exp: i32x4(123, 0, 123, 0), + }, + { + name: "i16x8u hi", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: false, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8u hi", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: false, + v: i16x8(1, 1, 1, 1, 0, 0, 0, 0), + exp: i32x4(0, 0, 0, 0), + }, + { + name: "i16x8u hi", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: false, + v: i16x8(0, 0, 0, 0, i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1)), + exp: i32x4(65535, 65535, 65535, 65535), + }, + { + name: "i16x8u hi", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: false, + v: i16x8(0, 0, 0, 0, 123, 0, 123, 0), + exp: i32x4(123, 0, 123, 0), + }, + { + name: "i16x8u lo", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: true, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8u lo", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: true, + v: i16x8(0, 0, 0, 0, 1, 1, 1, 1), + exp: i32x4(0, 0, 0, 0), + }, + { + name: "i16x8u lo", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: true, + v: i16x8(i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), 0, 0, 0, 0), + exp: i32x4(65535, 65535, 65535, 65535), + }, + { + name: "i16x8u lo", + shape: wazeroir.ShapeI16x8, + signed: false, + useLow: true, + v: i16x8(123, 0, 123, 0, 0, 0, 0, 0), + exp: i32x4(123, 0, 123, 0), + }, + { + name: "i32x4s hi", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: false, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i32x4s hi", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: false, + v: i32x4(0, 0, 1, i32ToU32(-1)), + exp: i64x2(1, i64ToU64(-1)), + }, + { + name: "i32x4s hi", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: false, + v: i32x4(1, i32ToU32(-1), 0, 0), + exp: i64x2(0, 0), + }, + { + name: "i32x4s hi", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: false, + v: i32x4(1, i32ToU32(-1), 123, 123), + exp: i64x2(123, 123), + }, + { + name: "i32x4s lo", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: true, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i32x4s lo", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: true, + v: i32x4(1, i32ToU32(-1), 0, 0), + exp: i64x2(1, i64ToU64(-1)), + }, + { + name: "i32x4s lo", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: true, + v: i32x4(0, 0, 1, i32ToU32(-1)), + exp: i64x2(0, 0), + }, + { + name: "i32x4s lo", + shape: wazeroir.ShapeI32x4, + signed: true, + useLow: true, + v: i32x4(123, 123, 1, i32ToU32(-1)), + exp: i64x2(123, 123), + }, + { + name: "i32x4u hi", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: false, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i32x4u hi", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: false, + v: i32x4(0, 0, 1, i32ToU32(-1)), + exp: i64x2(1, 4294967295), + }, + { + name: "i32x4u hi", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: false, + v: i32x4(1, i32ToU32(-1), 0, 0), + exp: i64x2(0, 0), + }, + { + name: "i32x4u hi", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: false, + v: i32x4(1, i32ToU32(-1), 123, 123), + exp: i64x2(123, 123), + }, + { + name: "i32x4u lo", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: true, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i32x4u lo", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: true, + v: i32x4(1, i32ToU32(-1), 0, 0), + exp: i64x2(1, 4294967295), + }, + { + name: "i32x4u lo", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: true, + v: i32x4(0, 0, 1, i32ToU32(-1)), + exp: i64x2(0, 0), + }, + { + name: "i32x4u lo", + shape: wazeroir.ShapeI32x4, + signed: false, + useLow: true, + v: i32x4(123, 123, 1, i32ToU32(-1)), + exp: i64x2(123, 123), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Extend(&wazeroir.OperationV128Extend{ + OriginShape: tc.shape, Signed: tc.signed, UseLow: tc.useLow, + }) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Q15mulrSatS(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + x1, x2, exp [16]byte + }{ + { + name: "1", + x1: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + x2: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "2", + x1: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + x2: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "3", + x1: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + x2: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "4", + x1: i16x8(65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535), + x2: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "5", + x1: i16x8(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767), + x2: i16x8(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767), + exp: i16x8(32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Q15mulrSatS(&wazeroir.OperationV128Q15mulrSatS{}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileFloatPromote(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + v, exp [16]byte + }{ + { + name: "1", + v: f32x4(float32(0x1.8f867ep+125), float32(0x1.8f867ep+125), float32(0x1.8f867ep+125), float32(0x1.8f867ep+125)), + exp: f64x2(6.6382536710104395e+37, 6.6382536710104395e+37), + }, + { + name: "2", + v: f32x4(float32(0x1.8f867ep+125), float32(0x1.8f867ep+125), 0, 0), + exp: f64x2(6.6382536710104395e+37, 6.6382536710104395e+37), + }, + { + name: "3", + v: f32x4(0, 0, float32(0x1.8f867ep+125), float32(0x1.8f867ep+125)), + exp: f64x2(0, 0), + }, + { + name: "4", + v: f32x4(float32(math.NaN()), float32(math.NaN()), float32(0x1.8f867ep+125), float32(0x1.8f867ep+125)), + exp: f64x2(math.NaN(), math.NaN()), + }, + { + name: "5", + v: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), float32(0x1.8f867ep+125), float32(0x1.8f867ep+125)), + exp: f64x2(math.Inf(1), math.Inf(-1)), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128FloatPromote(&wazeroir.OperationV128FloatPromote{}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + actualFs := [2]float64{ + math.Float64frombits(lo), math.Float64frombits(hi), + } + expFs := [2]float64{ + math.Float64frombits(binary.LittleEndian.Uint64(tc.exp[:8])), + math.Float64frombits(binary.LittleEndian.Uint64(tc.exp[8:])), + } + for i := range expFs { + exp, actual := expFs[i], actualFs[i] + if math.IsNaN(exp) { + require.True(t, math.IsNaN(actual)) + } else { + require.Equal(t, exp, actual) + } + } + }) + } +} + +func TestCompiler_compileV128FloatDemote(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + v, exp [16]byte + }{ + { + name: "1", + v: f64x2(0, 0), + exp: f32x4(0, 0, 0, 0), + }, + { + name: "2", + v: f64x2(0x1.fffffe0000000p-127, 0x1.fffffe0000000p-127), + exp: f32x4(0x1p-126, 0x1p-126, 0, 0), + }, + { + name: "3", + v: f64x2(0x1.fffffep+127, 0x1.fffffep+127), + exp: f32x4(0x1.fffffep+127, 0x1.fffffep+127, 0, 0), + }, + { + name: "4", + v: f64x2(math.NaN(), math.NaN()), + exp: f32x4(float32(math.NaN()), float32(math.NaN()), 0, 0), + }, + { + name: "5", + v: f64x2(math.Inf(1), math.Inf(-1)), + exp: f32x4(float32(math.Inf(1)), float32(math.Inf(-1)), 0, 0), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128FloatDemote(&wazeroir.OperationV128FloatDemote{}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + actualFs := [4]float32{ + math.Float32frombits(uint32(lo)), + math.Float32frombits(uint32(lo >> 32)), + math.Float32frombits(uint32(hi)), + math.Float32frombits(uint32(hi >> 32))} + expFs := [4]float32{ + math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[:4])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[4:8])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[8:12])), + math.Float32frombits(binary.LittleEndian.Uint32(tc.exp[12:])), + } + for i := range expFs { + exp, actual := expFs[i], actualFs[i] + if math.IsNaN(float64(exp)) { + require.True(t, math.IsNaN(float64(actual))) + } else { + require.Equal(t, exp, actual) + } + } + }) + } +} + +func TestCompiler_compileV128ExtAddPairwise(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + shape wazeroir.Shape + signed bool + v, exp [16]byte + }{ + { + name: "i8x16 s", + shape: wazeroir.ShapeI8x16, + signed: true, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i8x16 s", + shape: wazeroir.ShapeI8x16, + signed: true, + v: [16]byte{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + exp: i16x8(2, 2, 2, 2, 2, 2, 2, 2), + }, + { + name: "i8x16 s", + shape: wazeroir.ShapeI8x16, + signed: true, + v: [16]byte{ + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + }, + exp: i16x8( + i16ToU16(-2), i16ToU16(-2), i16ToU16(-2), i16ToU16(-2), + i16ToU16(-2), i16ToU16(-2), i16ToU16(-2), i16ToU16(-2), + ), + }, + { + name: "i8x16 s", + shape: wazeroir.ShapeI8x16, + signed: true, + v: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + exp: i16x8( + i16ToU16(-256), i16ToU16(-256), i16ToU16(-256), i16ToU16(-256), + i16ToU16(-256), i16ToU16(-256), i16ToU16(-256), i16ToU16(-256), + ), + }, + { + name: "i8x16 u", + shape: wazeroir.ShapeI8x16, + signed: false, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i8x16 u", + shape: wazeroir.ShapeI8x16, + signed: false, + v: [16]byte{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + exp: i16x8(2, 2, 2, 2, 2, 2, 2, 2), + }, + { + name: "i8x16 u", + shape: wazeroir.ShapeI8x16, + signed: false, + v: [16]byte{ + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), i8ToU8(-1), + }, + exp: i16x8(510, 510, 510, 510, 510, 510, 510, 510), + }, + { + name: "i8x16 u", + shape: wazeroir.ShapeI8x16, + signed: false, + v: [16]byte{ + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), i8ToU8(-128), + }, + exp: i16x8(256, 256, 256, 256, 256, 256, 256, 256), + }, + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + v: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + exp: i32x4(2, 2, 2, 2), + }, + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + v: i16x8( + i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), + i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), + ), + exp: i32x4(i32ToU32(-2), i32ToU32(-2), i32ToU32(-2), i32ToU32(-2)), + }, + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + v: i16x8( + i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), + i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), + ), + exp: i32x4(i32ToU32(-65536), i32ToU32(-65536), i32ToU32(-65536), i32ToU32(-65536)), + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + v: [16]byte{}, + exp: [16]byte{}, + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + v: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + exp: i32x4(2, 2, 2, 2), + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + v: i16x8( + i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), + i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), + ), + exp: i32x4(131070, 131070, 131070, 131070), + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + v: i16x8( + i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), + i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), i16ToU16(-32768), + ), + exp: i32x4(65536, 65536, 65536, 65536), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128ExtAddPairwise(&wazeroir.OperationV128ExtAddPairwise{ + OriginShape: tc.shape, Signed: tc.signed, + }) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Narrow(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + tests := []struct { + name string + shape wazeroir.Shape + signed bool + x1, x2, exp [16]byte + }{ + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + x2: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + exp: [16]byte{}, + }, + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + x2: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + exp: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + }, + }, + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + x2: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + exp: [16]byte{ + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + }, + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0), + x2: i16x8(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff), + exp: [16]byte{ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }, + }, + { + name: "i16x8 s", + shape: wazeroir.ShapeI16x8, + signed: true, + x1: i16x8(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff), + x2: i16x8(i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0), + exp: [16]byte{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, + }, + }, + // i + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + x2: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + exp: [16]byte{}, + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + x2: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + exp: [16]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + }, + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(1, 1, 1, 1, 1, 1, 1, 1), + x2: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + exp: [16]byte{ + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + }, + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0), + x2: i16x8(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff), + exp: [16]byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff), + x2: i16x8(i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0, i16ToU16(-0x8000), 0), + exp: [16]byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(i16ToU16(-1), 0, i16ToU16(-1), 0, i16ToU16(-1), 0, i16ToU16(-1), 0), + x2: i16x8(0, 0x100, 0, 0x100, 0, 0x100, 0, 0x100), + exp: [16]byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, + }, + }, + { + name: "i16x8 u", + shape: wazeroir.ShapeI16x8, + signed: false, + x1: i16x8(0, 0x100, 0, 0x100, 0, 0x100, 0, 0x100), + x2: i16x8(i16ToU16(-1), 0, i16ToU16(-1), 0, i16ToU16(-1), 0, i16ToU16(-1), 0), + exp: [16]byte{ + 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + }, + { + name: "i32x4 s", + shape: wazeroir.ShapeI32x4, + signed: true, + x1: i32x4(0, 0, 0, 0), + x2: i32x4(0, 0, 0, 0), + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i32x4 s", + shape: wazeroir.ShapeI32x4, + signed: true, + x1: i32x4(0, 0, 0, 0), + x2: i32x4(1, 1, 1, 1), + exp: i16x8(0, 0, 0, 0, 1, 1, 1, 1), + }, + { + name: "i32x4 s", + shape: wazeroir.ShapeI32x4, + signed: true, + x1: i32x4(1, 1, 1, 1), + x2: i32x4(0, 0, 0, 0), + exp: i16x8(1, 1, 1, 1, 0, 0, 0, 0), + }, + { + name: "i32x4 s", + shape: wazeroir.ShapeI32x4, + signed: true, + x1: i32x4(0x8000, 0x8000, 0x7fff, 0x7fff), + x2: i32x4(0x7fff, 0x7fff, 0x8000, 0x8000), + exp: i16x8(0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff), + }, + { + name: "i32x4 u", + shape: wazeroir.ShapeI32x4, + signed: false, + x1: i32x4(0, 0, 0, 0), + x2: i32x4(0, 0, 0, 0), + exp: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + }, + { + name: "i32x4 u", + shape: wazeroir.ShapeI32x4, + signed: false, + x1: i32x4(0, 0, 0, 0), + x2: i32x4(1, 1, 1, 1), + exp: i16x8(0, 0, 0, 0, 1, 1, 1, 1), + }, + { + name: "i32x4 u", + shape: wazeroir.ShapeI32x4, + signed: false, + x1: i32x4(1, 1, 1, 1), + x2: i32x4(0, 0, 0, 0), + exp: i16x8(1, 1, 1, 1, 0, 0, 0, 0), + }, + { + name: "i32x4 u", + shape: wazeroir.ShapeI32x4, + signed: false, + x1: i32x4(0x8000, 0x8000, 0x7fff, 0x7fff), + x2: i32x4(0x7fff, 0x7fff, 0x8000, 0x8000), + exp: i16x8(0x8000, 0x8000, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x8000, 0x8000), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Narrow(&wazeroir.OperationV128Narrow{ + OriginShape: tc.shape, Signed: tc.signed, + }) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128FConvertFromI(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + destShape wazeroir.Shape + signed bool + v, exp [16]byte + }{ + { + name: "f32x4 s", + destShape: wazeroir.ShapeF32x4, + signed: true, + v: i32x4(0, 0, 0, 0), + exp: f32x4(0, 0, 0, 0), + }, + { + name: "f32x4 s", + destShape: wazeroir.ShapeF32x4, + signed: true, + v: i32x4(1, 0, 2, 3), + exp: f32x4(1, 0, 2.0, 3), + }, + { + name: "f32x4 s", + destShape: wazeroir.ShapeF32x4, + signed: true, + v: i32x4(1234567890, i32ToU32(-2147483648.0), 2147483647, 1234567890), + exp: f32x4(0x1.26580cp+30, -2147483648.0, 2147483647, 0x1.26580cp+30), + }, + { + name: "f32x4 s", + destShape: wazeroir.ShapeF32x4, + signed: false, + v: i32x4(0, 0, 0, 0), + exp: f32x4(0, 0, 0, 0), + }, + { + name: "f32x4 s", + destShape: wazeroir.ShapeF32x4, + signed: false, + v: i32x4(1, 0, 2, 3), + exp: f32x4(1, 0, 2.0, 3), + }, + { + name: "f32x4 s", + destShape: wazeroir.ShapeF32x4, + signed: false, + v: i32x4(2147483647, i32ToU32(-2147483648.0), 2147483647, i32ToU32(-1)), + exp: f32x4(2147483648.0, 2147483648.0, 2147483648.0, 4294967295.0), + }, + { + name: "f64x2 s", + destShape: wazeroir.ShapeF64x2, + signed: true, + v: i32x4(0, 0, 0, 0), + exp: f64x2(0, 0), + }, + { + name: "f64x2 s", + destShape: wazeroir.ShapeF64x2, + signed: true, + v: i32x4(0, 0, i32ToU32(-1), i32ToU32(-32)), + exp: f64x2(0, 0), + }, + { + name: "f64x2 s", + destShape: wazeroir.ShapeF64x2, + signed: true, + v: i32x4(2147483647, i32ToU32(-2147483648), 0, 0), + exp: f64x2(2147483647, -2147483648), + }, + { + name: "f64x2 s", + destShape: wazeroir.ShapeF64x2, + signed: false, + v: i32x4(0, 0, 0, 0), + exp: f64x2(0, 0), + }, + { + name: "f64x2 s", + destShape: wazeroir.ShapeF64x2, + signed: false, + v: i32x4(0, 0, i32ToU32(-1), i32ToU32(-32)), + exp: f64x2(0, 0), + }, + { + name: "f64x2 s", + destShape: wazeroir.ShapeF64x2, + signed: false, + v: i32x4(2147483647, i32ToU32(-2147483648), 0, 0), + exp: f64x2(2147483647, 2147483648), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128FConvertFromI(&wazeroir.OperationV128FConvertFromI{ + DestinationShape: tc.destShape, + Signed: tc.signed, + }) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128Dot(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + x1, x2, exp [16]byte + }{ + { + name: "1", + x1: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + x2: i16x8(0, 0, 0, 0, 0, 0, 0, 0), + exp: i32x4(0, 0, 0, 0), + }, + { + name: "2", + x1: i16x8(1, 1, 1, 1, i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1)), + x2: i16x8(i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), i16ToU16(-1), 2, 2, 2, 2), + exp: i32x4(i32ToU32(-2), i32ToU32(-2), i32ToU32(-4), i32ToU32(-4)), + }, + { + name: "3", + x1: i16x8(65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535), + x2: i16x8(65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535), + exp: i32x4(2, 2, 2, 2), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x2[:8]), + Hi: binary.LittleEndian.Uint64(tc.x2[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.x1[:8]), + Hi: binary.LittleEndian.Uint64(tc.x1[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Dot(&wazeroir.OperationV128Dot{}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCompiler_compileV128ITruncSatFromF(t *testing.T) { + if runtime.GOARCH != "amd64" { + // TODO: implement on amd64. + t.Skip() + } + + tests := []struct { + name string + originShape wazeroir.Shape + signed bool + v, exp [16]byte + }{ + { + name: "f32x4 s", + originShape: wazeroir.ShapeF32x4, + signed: true, + v: i32x4(0, 0, 0, 0), + exp: f32x4(0, 0, 0, 0), + }, + { + name: "f32x4 s", + originShape: wazeroir.ShapeF32x4, + signed: true, + v: f32x4(1.5, -1.9, -1.9, 1.5), + exp: i32x4(1, i32ToU32(-1), i32ToU32(-1), 1), + }, + { + name: "f32x4 s", + originShape: wazeroir.ShapeF32x4, + signed: true, + v: f32x4(float32(math.NaN()), -4294967294.0, float32(math.Inf(-1)), float32(math.Inf(1))), + exp: i32x4(0, i32ToU32(-2147483648), i32ToU32(-2147483648), 2147483647), + }, + { + name: "f32x4 u", + originShape: wazeroir.ShapeF32x4, + signed: false, + v: i32x4(0, 0, 0, 0), + exp: f32x4(0, 0, 0, 0), + }, + { + name: "f32x4 u", + originShape: wazeroir.ShapeF32x4, + signed: false, + v: f32x4(1.5, -1.9, -1.9, 1.5), + exp: i32x4(1, 0, 0, 1), + }, + { + name: "f32x4 u", + originShape: wazeroir.ShapeF32x4, + signed: false, + v: f32x4(float32(math.NaN()), -4294967294.0, 4294967294.0, float32(math.Inf(1))), + exp: i32x4(0, 0, 4294967295, 4294967295), + }, + { + name: "f64x2 s", + originShape: wazeroir.ShapeF64x2, + signed: true, + v: f64x2(0, 0), + exp: i32x4(0, 0, 0, 0), + }, + { + name: "f64x2 s", + originShape: wazeroir.ShapeF64x2, + signed: true, + v: f64x2(5.123, -2.0), + exp: i32x4(5, i32ToU32(-2), 0, 0), + }, + { + name: "f64x2 s", + originShape: wazeroir.ShapeF64x2, + signed: true, + v: f64x2(math.NaN(), math.Inf(1)), + exp: i32x4(0, 2147483647, 0, 0), + }, + { + name: "f64x2 s", + originShape: wazeroir.ShapeF64x2, + signed: true, + v: f64x2(math.Inf(-1), 4294967294.0), + exp: i32x4(i32ToU32(-2147483648), 2147483647, 0, 0), + }, + { + name: "f64x2 u", + originShape: wazeroir.ShapeF64x2, + signed: false, + v: f64x2(0, 0), + exp: i32x4(0, 0, 0, 0), + }, + { + name: "f64x2 u", + originShape: wazeroir.ShapeF64x2, + signed: false, + v: f64x2(5.123, -2.0), + exp: i32x4(5, 0, 0, 0), + }, + { + name: "f64x2 u", + originShape: wazeroir.ShapeF64x2, + signed: false, + v: f64x2(math.NaN(), math.Inf(1)), + exp: i32x4(0, 4294967295, 0, 0), + }, + { + name: "f64x2 u", + originShape: wazeroir.ShapeF64x2, + signed: false, + v: f64x2(math.Inf(-1), 4294967296.0), + exp: i32x4(0, 4294967295, 0, 0), + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.v[:8]), + Hi: binary.LittleEndian.Uint64(tc.v[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128ITruncSatFromF(&wazeroir.OperationV128ITruncSatFromF{ + OriginShape: tc.originShape, + Signed: tc.signed, + }) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.exp, actual) + }) + } +} diff --git a/internal/engine/compiler/engine.go b/internal/engine/compiler/engine.go index fbf7c07809d..e68c5da181f 100644 --- a/internal/engine/compiler/engine.go +++ b/internal/engine/compiler/engine.go @@ -148,14 +148,14 @@ type ( // Where we store the status code of Compiler execution. statusCode nativeCallStatusCode - // Set when statusCode == compilerStatusCallBuiltInFunction} + // Set when statusCode == compilerStatusCallBuiltInFunction // Indicating the function call index. builtinFunctionCallIndex wasm.Index } // callFrame holds the information to which the caller function can return. // callFrame is created for currently executed function frame as well, - // so some of the fields are not yet set when native code is currently executing it. + // so some fields are not yet set when native code is currently executing it. // That is, callFrameTop().returnAddress or returnStackBasePointer are not set // until it makes a function call. callFrame struct { @@ -187,8 +187,8 @@ type ( parent *code } - // code corresponds to a function in a module (not insantaited one). This holds the machine code - // compiled by Wazero's compiler. + // code corresponds to a function in a module (not instantiated one). This holds the machine code + // compiled by wazero compiler. code struct { // codeSegment is holding the compiled native code as a byte slice. codeSegment []byte @@ -203,7 +203,7 @@ type ( sourceModule *wasm.Module } - // staticData holds the read-only data (i.e. out side of codeSegment which is marked as executable) per function. + // staticData holds the read-only data (i.e. outside codeSegment which is marked as executable) per function. // This is used to store jump tables for br_table instructions. // The primary index is the logical separation of multiple data, for example data[0] and data[1] // correspond to different jump tables for different br_table instructions. @@ -250,7 +250,7 @@ const ( callEngineValueStackContextStackBasePointerOffset = 120 // Offsets for callEngine exitContext. - callEngineExitContextnativeCallStatusCodeOffset = 128 + callEngineExitContextNativeCallStatusCodeOffset = 128 callEngineExitContextBuiltinFunctionCallAddressOffset = 132 // Offsets for callFrame. @@ -294,10 +294,10 @@ const ( // https://github.com/golang/go/blob/release-branch.go1.17/src/runtime/runtime2.go#L207-L210 interfaceDataOffset = 8 - // Consts for DataInstance. + // Consts for wasm.DataInstance. dataInstanceStructSize = 24 - // Consts for ElementInstance. + // Consts for wasm.ElementInstance. elementInstanceStructSize = 32 // pointerSizeLog2 satisfies: 1 << pointerSizeLog2 = sizeOf(uintptr) @@ -330,7 +330,7 @@ const ( nativeCallStatusIntegerDivisionByZero ) -// causePanic causes a panic with the corresponding error to the status code. +// causePanic causes a panic with the corresponding error to the nativeCallStatusCode. func (s nativeCallStatusCode) causePanic() { var err error switch s { @@ -495,9 +495,9 @@ func (e *engine) NewModuleEngine(name string, module *wasm.Module, importedFunct return me, wasm.ErrElementOffsetOutOfBounds } - for i, funcindex := range init.FunctionIndexes { - if funcindex != nil { - references[init.Offset+uint32(i)] = uintptr(unsafe.Pointer(me.functions[*funcindex])) + for i, funcIdx := range init.FunctionIndexes { + if funcIdx != nil { + references[init.Offset+uint32(i)] = uintptr(unsafe.Pointer(me.functions[*funcIdx])) } } } @@ -820,7 +820,7 @@ func (ce *callEngine) builtinFunctionMemoryGrow(ctx context.Context, mem *wasm.M func (ce *callEngine) builtinFunctionTableGrow(ctx context.Context, tables []*wasm.TableInstance) { tableIndex := ce.popValue() - table := tables[tableIndex] // verifed not to be out of range by the func validation at compilation phase. + table := tables[tableIndex] // verified not to be out of range by the func validation at compilation phase. num := ce.popValue() ref := ce.popValue() res := table.Grow(ctx, uint32(num), uintptr(ref)) @@ -1094,6 +1094,60 @@ func compileWasmFunction(_ wasm.Features, ir *wazeroir.CompilationResult) (*code err = compiler.compileV128Shl(o) case *wazeroir.OperationV128Cmp: err = compiler.compileV128Cmp(o) + case *wazeroir.OperationV128AddSat: + err = compiler.compileV128AddSat(o) + case *wazeroir.OperationV128SubSat: + err = compiler.compileV128SubSat(o) + case *wazeroir.OperationV128Mul: + err = compiler.compileV128Mul(o) + case *wazeroir.OperationV128Div: + err = compiler.compileV128Div(o) + case *wazeroir.OperationV128Neg: + err = compiler.compileV128Neg(o) + case *wazeroir.OperationV128Sqrt: + err = compiler.compileV128Sqrt(o) + case *wazeroir.OperationV128Abs: + err = compiler.compileV128Abs(o) + case *wazeroir.OperationV128Popcnt: + err = compiler.compileV128Popcnt(o) + case *wazeroir.OperationV128Min: + err = compiler.compileV128Min(o) + case *wazeroir.OperationV128Max: + err = compiler.compileV128Max(o) + case *wazeroir.OperationV128AvgrU: + err = compiler.compileV128AvgrU(o) + case *wazeroir.OperationV128Pmin: + err = compiler.compileV128Pmin(o) + case *wazeroir.OperationV128Pmax: + err = compiler.compileV128Pmax(o) + case *wazeroir.OperationV128Ceil: + err = compiler.compileV128Ceil(o) + case *wazeroir.OperationV128Floor: + err = compiler.compileV128Floor(o) + case *wazeroir.OperationV128Trunc: + err = compiler.compileV128Trunc(o) + case *wazeroir.OperationV128Nearest: + err = compiler.compileV128Nearest(o) + case *wazeroir.OperationV128Extend: + err = compiler.compileV128Extend(o) + case *wazeroir.OperationV128ExtMul: + err = compiler.compileV128ExtMul(o) + case *wazeroir.OperationV128Q15mulrSatS: + err = compiler.compileV128Q15mulrSatS(o) + case *wazeroir.OperationV128ExtAddPairwise: + err = compiler.compileV128ExtAddPairwise(o) + case *wazeroir.OperationV128FloatPromote: + err = compiler.compileV128FloatPromote(o) + case *wazeroir.OperationV128FloatDemote: + err = compiler.compileV128FloatDemote(o) + case *wazeroir.OperationV128FConvertFromI: + err = compiler.compileV128FConvertFromI(o) + case *wazeroir.OperationV128Dot: + err = compiler.compileV128Dot(o) + case *wazeroir.OperationV128Narrow: + err = compiler.compileV128Narrow(o) + case *wazeroir.OperationV128ITruncSatFromF: + err = compiler.compileV128ITruncSatFromF(o) default: err = errors.New("unsupported") } diff --git a/internal/engine/compiler/engine_test.go b/internal/engine/compiler/engine_test.go index 6ea3bfa0db4..2cf04e0da30 100644 --- a/internal/engine/compiler/engine_test.go +++ b/internal/engine/compiler/engine_test.go @@ -46,7 +46,7 @@ func TestCompiler_VerifyOffsetValue(t *testing.T) { require.Equal(t, int(unsafe.Offsetof(ce.stackBasePointer)), callEngineValueStackContextStackBasePointerOffset) // Offsets for callEngine.exitContext. - require.Equal(t, int(unsafe.Offsetof(ce.statusCode)), callEngineExitContextnativeCallStatusCodeOffset) + require.Equal(t, int(unsafe.Offsetof(ce.statusCode)), callEngineExitContextNativeCallStatusCodeOffset) require.Equal(t, int(unsafe.Offsetof(ce.builtinFunctionCallIndex)), callEngineExitContextBuiltinFunctionCallAddressOffset) // Size and offsets for callFrame. diff --git a/internal/engine/compiler/impl_amd64.go b/internal/engine/compiler/impl_amd64.go index c0c97968266..81cd9cb93dc 100644 --- a/internal/engine/compiler/impl_amd64.go +++ b/internal/engine/compiler/impl_amd64.go @@ -1814,33 +1814,7 @@ func (c *amd64Compiler) compileTrunc(o *wazeroir.OperationTrunc) error { // compileNearest implements compiler.compileNearest for the amd64 architecture. func (c *amd64Compiler) compileNearest(o *wazeroir.OperationNearest) error { - // Internally, nearest can be performed via ROUND instruction with 0x00 mode. - // If we compile the following Wat by "wasmtime wasm2obj", - // - // (module - // (func (export "nearest_f32") (param $x f32) (result f32) (f32.nearest (local.get $x))) - // (func (export "nearest_f64") (param $x f64) (result f64) (f64.nearest (local.get $x))) - // ) - // - // we see a disassemble of the object via "objdump --disassemble-all" like: - // - // 0000000000000000 <_wasm_function_0>: - // 0: 55 push %rbp - // 1: 48 89 e5 mov %rsp,%rbp - // 4: 66 0f 3a 0a c0 00 roundss $0x0,%xmm0,%xmm0 - // a: 48 89 ec mov %rbp,%rsp - // d: 5d pop %rbp - // e: c3 retq - // - // 000000000000000f <_wasm_function_1>: - // f: 55 push %rbp - // 10: 48 89 e5 mov %rsp,%rbp - // 13: 66 0f 3a 0b c0 00 roundsd $0x0,%xmm0,%xmm0 - // 19: 48 89 ec mov %rbp,%rsp - // 1c: 5d pop %rbp - // 1d: c3 retq - // - // Below, we use the same implementation: "rounds{s,d} $0x0,%xmm0,%xmm0" where the mode is set to zero. + // Nearest can be performed via ROUND instruction with 0x00 mode. return c.compileRoundInstruction(o.Type == wazeroir.Float32, 0x00) } @@ -1886,7 +1860,7 @@ func (c *amd64Compiler) compileMax(o *wazeroir.OperationMax) error { // Native min/max instructions return non-NaN value if exactly one of target values // is NaN. For example native_{min,max}(5.0, NaN) returns always 5.0, not NaN. // However, WebAssembly specifies that min/max must always return NaN if one of values is NaN. -// Therefore in this function, we have to add conditional jumps to check if one of values is NaN before +// Therefore, in this function, we have to add conditional jumps to check if one of values is NaN before // the native min/max, which is why we cannot simply emit a native min/max instruction here. // // For the semantics, see wazeroir.Min and wazeroir.Max for detail. @@ -4675,7 +4649,7 @@ func (c *amd64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) } func (c *amd64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { - c.assembler.CompileConstToMemory(amd64.MOVB, int64(status), amd64ReservedRegisterForCallEngine, callEngineExitContextnativeCallStatusCodeOffset) + c.assembler.CompileConstToMemory(amd64.MOVB, int64(status), amd64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) // Write back the cached SP to the actual eng.stackPointer. c.assembler.CompileConstToMemory(amd64.MOVQ, int64(c.locationStack.sp), amd64ReservedRegisterForCallEngine, callEngineValueStackContextStackPointerOffset) diff --git a/internal/engine/compiler/impl_arm64.go b/internal/engine/compiler/impl_arm64.go index 9ffb4ebf7e9..91fd60cd363 100644 --- a/internal/engine/compiler/impl_arm64.go +++ b/internal/engine/compiler/impl_arm64.go @@ -396,10 +396,10 @@ func (c *arm64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { if status != 0 { c.assembler.CompileConstToRegister(arm64.MOVW, int64(status), arm64ReservedRegisterForTemporary) - c.assembler.CompileRegisterToMemory(arm64.MOVWU, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, callEngineExitContextnativeCallStatusCodeOffset) + c.assembler.CompileRegisterToMemory(arm64.MOVWU, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) } else { // If the status == 0, we use zero register to store zero. - c.assembler.CompileRegisterToMemory(arm64.MOVWU, arm64.RegRZR, arm64ReservedRegisterForCallEngine, callEngineExitContextnativeCallStatusCodeOffset) + c.assembler.CompileRegisterToMemory(arm64.MOVWU, arm64.RegRZR, arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) } // The return address to the Go code is stored in archContext.compilerReturnAddress which diff --git a/internal/engine/compiler/impl_vec_amd64.go b/internal/engine/compiler/impl_vec_amd64.go index d280faa0d77..c2c74956d4c 100644 --- a/internal/engine/compiler/impl_vec_amd64.go +++ b/internal/engine/compiler/impl_vec_amd64.go @@ -1,6 +1,8 @@ package compiler import ( + "errors" + "github.com/tetratelabs/wazero/internal/asm" "github.com/tetratelabs/wazero/internal/asm/amd64" "github.com/tetratelabs/wazero/internal/wazeroir" @@ -60,7 +62,7 @@ func (c *amd64Compiler) compileV128Add(o *wazeroir.OperationV128Add) error { case wazeroir.ShapeI16x8: inst = amd64.PADDW case wazeroir.ShapeI32x4: - inst = amd64.PADDL + inst = amd64.PADDD case wazeroir.ShapeI64x2: inst = amd64.PADDQ case wazeroir.ShapeF32x4: @@ -93,7 +95,7 @@ func (c *amd64Compiler) compileV128Sub(o *wazeroir.OperationV128Sub) error { case wazeroir.ShapeI16x8: inst = amd64.PSUBW case wazeroir.ShapeI32x4: - inst = amd64.PSUBL + inst = amd64.PSUBD case wazeroir.ShapeI64x2: inst = amd64.PSUBQ case wazeroir.ShapeF32x4: @@ -116,21 +118,21 @@ func (c *amd64Compiler) compileV128Load(o *wazeroir.OperationV128Load) error { } switch o.Type { - case wazeroir.LoadV128Type128: + case wazeroir.V128LoadType128: err = c.compileV128LoadImpl(amd64.MOVDQU, o.Arg.Offset, 16, result) - case wazeroir.LoadV128Type8x8s: + case wazeroir.V128LoadType8x8s: err = c.compileV128LoadImpl(amd64.PMOVSXBW, o.Arg.Offset, 8, result) - case wazeroir.LoadV128Type8x8u: + case wazeroir.V128LoadType8x8u: err = c.compileV128LoadImpl(amd64.PMOVZXBW, o.Arg.Offset, 8, result) - case wazeroir.LoadV128Type16x4s: + case wazeroir.V128LoadType16x4s: err = c.compileV128LoadImpl(amd64.PMOVSXWD, o.Arg.Offset, 8, result) - case wazeroir.LoadV128Type16x4u: + case wazeroir.V128LoadType16x4u: err = c.compileV128LoadImpl(amd64.PMOVZXWD, o.Arg.Offset, 8, result) - case wazeroir.LoadV128Type32x2s: + case wazeroir.V128LoadType32x2s: err = c.compileV128LoadImpl(amd64.PMOVSXDQ, o.Arg.Offset, 8, result) - case wazeroir.LoadV128Type32x2u: + case wazeroir.V128LoadType32x2u: err = c.compileV128LoadImpl(amd64.PMOVZXDQ, o.Arg.Offset, 8, result) - case wazeroir.LoadV128Type8Splat: + case wazeroir.V128LoadType8Splat: reg, err := c.compileMemoryAccessCeilSetup(o.Arg.Offset, 1) if err != nil { return err @@ -148,7 +150,7 @@ func (c *amd64Compiler) compileV128Load(o *wazeroir.OperationV128Load) error { c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRB, reg, result, 0) c.assembler.CompileRegisterToRegister(amd64.PXOR, tmpVReg, tmpVReg) c.assembler.CompileRegisterToRegister(amd64.PSHUFB, tmpVReg, result) - case wazeroir.LoadV128Type16Splat: + case wazeroir.V128LoadType16Splat: reg, err := c.compileMemoryAccessCeilSetup(o.Arg.Offset, 2) if err != nil { return err @@ -161,7 +163,7 @@ func (c *amd64Compiler) compileV128Load(o *wazeroir.OperationV128Load) error { c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRW, reg, result, 0) c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRW, reg, result, 1) c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, result, result, 0) - case wazeroir.LoadV128Type32Splat: + case wazeroir.V128LoadType32Splat: reg, err := c.compileMemoryAccessCeilSetup(o.Arg.Offset, 4) if err != nil { return err @@ -172,7 +174,7 @@ func (c *amd64Compiler) compileV128Load(o *wazeroir.OperationV128Load) error { // pshufd $0, result, result (result = result[0,0,0,0]) c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRD, reg, result, 0) c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, result, result, 0) - case wazeroir.LoadV128Type64Splat: + case wazeroir.V128LoadType64Splat: reg, err := c.compileMemoryAccessCeilSetup(o.Arg.Offset, 8) if err != nil { return err @@ -183,9 +185,9 @@ func (c *amd64Compiler) compileV128Load(o *wazeroir.OperationV128Load) error { // pinsrq $1, reg, result c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, reg, result, 0) c.assembler.CompileRegisterToRegisterWithArg(amd64.PINSRQ, reg, result, 1) - case wazeroir.LoadV128Type32zero: + case wazeroir.V128LoadType32zero: err = c.compileV128LoadImpl(amd64.MOVL, o.Arg.Offset, 4, result) - case wazeroir.LoadV128Type64zero: + case wazeroir.V128LoadType64zero: err = c.compileV128LoadImpl(amd64.MOVQ, o.Arg.Offset, 8, result) } @@ -827,7 +829,7 @@ func (c *amd64Compiler) compileV128ShrImpl(o *wazeroir.OperationV128Shr) error { return nil } -// compileV128ShrI64x2SignedImpl implements compiler.compileV128Shr for i64x4 signed (arithmetic) shift. +// compileV128ShrI64x2SignedImpl implements compiler.compileV128Shr for i64x2 signed (arithmetic) shift. // PSRAQ instruction requires AVX, so we emulate it without AVX instructions. https://www.felixcloutier.com/x86/psraw:psrad:psraq func (c *amd64Compiler) compileV128ShrI64x2SignedImpl() error { const shiftCountRegister = amd64.RegCX @@ -1330,3 +1332,1375 @@ func (c *amd64Compiler) compileV128Cmp(o *wazeroir.OperationV128Cmp) error { c.pushVectorRuntimeValueLocationOnRegister(result) return nil } + +// compileV128AddSat implements compiler.compileV128AddSat for amd64. +func (c *amd64Compiler) compileV128AddSat(o *wazeroir.OperationV128AddSat) error { + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeI8x16: + if o.Signed { + inst = amd64.PADDSB + } else { + inst = amd64.PADDUSB + } + case wazeroir.ShapeI16x8: + if o.Signed { + inst = amd64.PADDSW + } else { + inst = amd64.PADDUSW + } + } + + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +// compileV128SubSat implements compiler.compileV128SubSat for amd64. +func (c *amd64Compiler) compileV128SubSat(o *wazeroir.OperationV128SubSat) error { + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeI8x16: + if o.Signed { + inst = amd64.PSUBSB + } else { + inst = amd64.PSUBUSB + } + case wazeroir.ShapeI16x8: + if o.Signed { + inst = amd64.PSUBSW + } else { + inst = amd64.PSUBUSW + } + } + + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +// compileV128Mul implements compiler.compileV128Mul for amd64. +func (c *amd64Compiler) compileV128Mul(o *wazeroir.OperationV128Mul) error { + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeI16x8: + inst = amd64.PMULLW + case wazeroir.ShapeI32x4: + inst = amd64.PMULLD + case wazeroir.ShapeI64x2: + return c.compileV128MulI64x2() + case wazeroir.ShapeF32x4: + inst = amd64.MULPS + case wazeroir.ShapeF64x2: + inst = amd64.MULPD + } + + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +// compileV128MulI64x2 implements V128Mul for i64x2. +func (c *amd64Compiler) compileV128MulI64x2() error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + x1r, x2r := x1.register, x2.register + + tmp1, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + c.locationStack.markRegisterUsed(tmp1) + + tmp2, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + // Assuming that we have + // x1r = [p1, p2] = [p1_lo, p1_hi, p2_lo, p2_high] + // x2r = [q1, q2] = [q1_lo, q1_hi, q2_lo, q2_high] + // where pN and qN are 64-bit (quad word) lane, and pN_lo, pN_hi, qN_lo and qN_hi are 32-bit (double word) lane. + + // Copy x1's value into tmp1. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1r, tmp1) + // And do the logical right shift by 32-bit on tmp1, which makes tmp1 = [0, p1_high, 0, p2_high] + c.assembler.CompileConstToRegister(amd64.PSRLQ, 32, tmp1) + + // Execute "pmuludq x2r,tmp1", which makes tmp1 = [p1_high*q1_lo, p2_high*q2_lo] where each lane is 64-bit. + c.assembler.CompileRegisterToRegister(amd64.PMULUDQ, x2r, tmp1) + + // Copy x2's value into tmp2. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x2r, tmp2) + // And do the logical right shift by 32-bit on tmp2, which makes tmp2 = [0, q1_high, 0, q2_high] + c.assembler.CompileConstToRegister(amd64.PSRLQ, 32, tmp2) + + // Execute "pmuludq x1r,tmp2", which makes tmp2 = [p1_lo*q1_high, p2_lo*q2_high] where each lane is 64-bit. + c.assembler.CompileRegisterToRegister(amd64.PMULUDQ, x1r, tmp2) + + // Adds tmp1 and tmp2 and do the logical left shift by 32-bit, + // which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32, (p2_lo*q2_high+p2_high*q2_lo)<<32] + c.assembler.CompileRegisterToRegister(amd64.PADDQ, tmp2, tmp1) + c.assembler.CompileConstToRegister(amd64.PSLLQ, 32, tmp1) + + // Execute "pmuludq x2r,x1r", which makes x1r = [p1_lo*q1_lo, p2_lo*q2_lo] where each lane is 64-bit. + c.assembler.CompileRegisterToRegister(amd64.PMULUDQ, x2r, x1r) + + // Finally, we get the result by adding x1r and tmp1, + // which makes x1r = [(p1_lo*q1_high+p1_high*q1_lo)<<32+p1_lo*q1_lo, (p2_lo*q2_high+p2_high*q2_lo)<<32+p2_lo*q2_lo] + c.assembler.CompileRegisterToRegister(amd64.PADDQ, tmp1, x1r) + + c.locationStack.markRegisterUnused(x2r, tmp1) + c.pushVectorRuntimeValueLocationOnRegister(x1r) + return nil +} + +// compileV128Div implements compiler.compileV128Div for amd64. +func (c *amd64Compiler) compileV128Div(o *wazeroir.OperationV128Div) error { + + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeF32x4: + inst = amd64.DIVPS + case wazeroir.ShapeF64x2: + inst = amd64.DIVPD + } + + c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +// compileV128Neg implements compiler.compileV128Neg for amd64. +func (c *amd64Compiler) compileV128Neg(o *wazeroir.OperationV128Neg) error { + if o.Shape <= wazeroir.ShapeI64x2 { + return c.compileV128NegInt(o.Shape) + } else { + return c.compileV128NegFloat(o.Shape) + } +} + +// compileV128NegInt implements compiler.compileV128Neg for integer lanes. +func (c *amd64Compiler) compileV128NegInt(s wazeroir.Shape) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + + result, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + var subInst asm.Instruction + switch s { + case wazeroir.ShapeI8x16: + subInst = amd64.PSUBB + case wazeroir.ShapeI16x8: + subInst = amd64.PSUBW + case wazeroir.ShapeI32x4: + subInst = amd64.PSUBD + case wazeroir.ShapeI64x2: + subInst = amd64.PSUBQ + } + + c.assembler.CompileRegisterToRegister(amd64.PXOR, result, result) + c.assembler.CompileRegisterToRegister(subInst, v.register, result) + + c.locationStack.markRegisterUnused(v.register) + c.pushVectorRuntimeValueLocationOnRegister(result) + return nil +} + +// compileV128NegInt implements compiler.compileV128Neg for float lanes. +func (c *amd64Compiler) compileV128NegFloat(s wazeroir.Shape) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + var leftShiftInst, xorInst asm.Instruction + var leftShiftAmount asm.ConstantValue + if s == wazeroir.ShapeF32x4 { + leftShiftInst, leftShiftAmount, xorInst = amd64.PSLLD, 31, amd64.XORPS + } else { + leftShiftInst, leftShiftAmount, xorInst = amd64.PSLLQ, 63, amd64.XORPD + } + + // Set all bits on tmp by CMPPD with arg=0 (== pseudo CMPEQPS instruction). + // See https://www.felixcloutier.com/x86/cmpps + c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPD, tmp, tmp, 0) + // Do the left shift on each lane to set only the most significant bit in each. + c.assembler.CompileConstToRegister(leftShiftInst, leftShiftAmount, tmp) + // Get the negated result by XOR on each lane with tmp. + c.assembler.CompileRegisterToRegister(xorInst, tmp, v.register) + + c.pushVectorRuntimeValueLocationOnRegister(v.register) + return nil +} + +// compileV128Sqrt implements compiler.compileV128Sqrt for amd64. +func (c *amd64Compiler) compileV128Sqrt(o *wazeroir.OperationV128Sqrt) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeF64x2: + inst = amd64.SQRTPD + case wazeroir.ShapeF32x4: + inst = amd64.SQRTPS + } + + c.assembler.CompileRegisterToRegister(inst, v.register, v.register) + c.pushVectorRuntimeValueLocationOnRegister(v.register) + return nil +} + +// compileV128Abs implements compiler.compileV128Abs for amd64. +func (c *amd64Compiler) compileV128Abs(o *wazeroir.OperationV128Abs) error { + if o.Shape == wazeroir.ShapeI64x2 { + return c.compileV128AbsI64x2() + } + + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + + result := v.register + switch o.Shape { + case wazeroir.ShapeI8x16: + c.assembler.CompileRegisterToRegister(amd64.PABSB, result, result) + case wazeroir.ShapeI16x8: + c.assembler.CompileRegisterToRegister(amd64.PABSW, result, result) + case wazeroir.ShapeI32x4: + c.assembler.CompileRegisterToRegister(amd64.PABSD, result, result) + case wazeroir.ShapeF32x4: + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + // Set all bits on tmp. + c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, tmp) + // Shift right packed single floats by 1 to clear the sign bits. + c.assembler.CompileConstToRegister(amd64.PSRLD, 1, tmp) + // Clear the sign bit of vr. + c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmp, result) + case wazeroir.ShapeF64x2: + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + // Set all bits on tmp. + c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, tmp) + // Shift right packed single floats by 1 to clear the sign bits. + c.assembler.CompileConstToRegister(amd64.PSRLQ, 1, tmp) + // Clear the sign bit of vr. + c.assembler.CompileRegisterToRegister(amd64.ANDPD, tmp, result) + } + + c.pushVectorRuntimeValueLocationOnRegister(result) + return nil +} + +// compileV128AbsI64x2 implements compileV128Abs for i64x2 lanes. +func (c *amd64Compiler) compileV128AbsI64x2() error { + // See https://www.felixcloutier.com/x86/blendvpd + const blendMaskReg = amd64.RegX0 + c.onValueReleaseRegisterToStack(blendMaskReg) + c.locationStack.markRegisterUsed(blendMaskReg) + + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + if vr == blendMaskReg { + return errors.New("BUG: X0 must not be used") + } + + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + c.locationStack.markRegisterUsed(tmp) + + // Copy the value to tmp. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp) + + // Clear all bits on blendMaskReg. + c.assembler.CompileRegisterToRegister(amd64.PXOR, blendMaskReg, blendMaskReg) + // Subtract vr from blendMaskReg. + c.assembler.CompileRegisterToRegister(amd64.PSUBQ, vr, blendMaskReg) + // Copy the subtracted value ^^ back into vr. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, blendMaskReg, vr) + + c.assembler.CompileRegisterToRegister(amd64.BLENDVPD, tmp, vr) + + c.locationStack.markRegisterUnused(blendMaskReg, tmp) + c.pushVectorRuntimeValueLocationOnRegister(vr) + return nil +} + +var ( + popcntMask = [16]byte{ + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + } + // popcntTable holds each index's Popcnt, for example popcntTable[5] holds popcnt(0x05). + popcntTable = [16]byte{ + 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, + 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, + } +) + +// compileV128Popcnt implements compiler.compileV128Popcnt for amd64. +func (c *amd64Compiler) compileV128Popcnt(*wazeroir.OperationV128Popcnt) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + tmp1, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + c.locationStack.markRegisterUsed(tmp1) + + tmp2, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + c.locationStack.markRegisterUsed(tmp2) + + tmp3, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + // Read the popcntMask into tmp1, and we have + // tmp1 = [0xf, ..., 0xf] + if err := c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, popcntMask[:], tmp1); err != nil { + return err + } + + // Copy the original value into tmp2. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp2) + + // Given that we have: + // v = [b1, ..., b16] where bn = hn:ln and hn and ln are higher and lower 4-bits of bn. + // + // Take PAND on tmp1 and tmp2, and we have + // tmp2 = [l1, ..., l16]. + c.assembler.CompileRegisterToRegister(amd64.PAND, tmp1, tmp2) + + // Do logical (packed word) right shift by 4 on vr and PAND with vr and tmp1, meaning that we have + // vr = [h1, ...., h16]. + c.assembler.CompileConstToRegister(amd64.PSRLW, 4, vr) + c.assembler.CompileRegisterToRegister(amd64.PAND, tmp1, vr) + + // Read the popcntTable into tmp1, and we have + // tmp1 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04] + if err := c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, popcntTable[:], tmp1); err != nil { + return err + } + + // Copy the tmp1 into tmp3, and we have + // tmp3 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04] + c.assembler.CompileRegisterToRegister(amd64.MOVDQU, tmp1, tmp3) + + // tmp3 = [popcnt(l1), ..., popcnt(l16)]. + c.assembler.CompileRegisterToRegister(amd64.PSHUFB, tmp2, tmp3) + + // tmp1 = [popcnt(h1), ..., popcnt(h16)]. + c.assembler.CompileRegisterToRegister(amd64.PSHUFB, vr, tmp1) + + // vr = tmp1 = [popcnt(h1), ..., popcnt(h16)]. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, tmp1, vr) + + // vr += tmp3 = [popcnt(h1)+popcnt(l1), ..., popcnt(h16)+popcnt(l16)] = [popcnt(b1), ..., popcnt(b16)]. + c.assembler.CompileRegisterToRegister(amd64.PADDB, tmp3, vr) + + c.locationStack.markRegisterUnused(tmp1, tmp2) + c.pushVectorRuntimeValueLocationOnRegister(vr) + return nil +} + +// compileV128Min implements compiler.compileV128Min for amd64. +func (c *amd64Compiler) compileV128Min(o *wazeroir.OperationV128Min) error { + if o.Shape >= wazeroir.ShapeF32x4 { + return c.compileV128MinOrMaxFloat(o.Shape, true) + } + + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeI8x16: + if o.Signed { + inst = amd64.PMINSB + } else { + inst = amd64.PMINUB + } + case wazeroir.ShapeI16x8: + if o.Signed { + inst = amd64.PMINSW + } else { + inst = amd64.PMINUW + } + case wazeroir.ShapeI32x4: + if o.Signed { + inst = amd64.PMINSD + } else { + inst = amd64.PMINUD + } + } + + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +// compileV128MinOrMaxFloat implements compiler.compileV128Min and compiler.compileV128Max for float lanes. +func (c *amd64Compiler) compileV128MinOrMaxFloat(o wazeroir.Shape, isMin bool) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + x1r, x2r := x1.register, x2.register + + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + var minOrMaxInst, cmpInst, andnInst, orInst, logicalRightShiftInst asm.Instruction + var shiftNumToInverseNaN asm.ConstantValue + if o == wazeroir.ShapeF32x4 { + cmpInst, andnInst, orInst, logicalRightShiftInst, shiftNumToInverseNaN = + amd64.CMPPS, amd64.ANDNPS, amd64.ORPS, amd64.PSRLD, 0xa + if isMin { + minOrMaxInst = amd64.MINPS + } else { + minOrMaxInst = amd64.MAXPS + } + } else { + cmpInst, andnInst, orInst, logicalRightShiftInst, shiftNumToInverseNaN = + amd64.CMPPD, amd64.ANDNPD, amd64.ORPD, amd64.PSRLQ, 0xd + if isMin { + minOrMaxInst = amd64.MINPD + } else { + minOrMaxInst = amd64.MAXPD + } + } + + // Copy the value on x1 to tmp. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1r, tmp) + + // Denote the original x1r and x2r 's vector as v1 and v2 below. + // + // Execute MINPS/MINPD/MAXPS/MAXPD with destination = tmp (holding v1), and we have + // tmp = [ if (v1[i] != NaN && v2[i] != NaN) {min_max(v1[i], v2[i])} else {v1[i]} for i in 0..LANE_NUM] + c.assembler.CompileRegisterToRegister(minOrMaxInst, x2r, tmp) + + // Execute MINPS/MINPD/MAXPS/MAXPD with destination = x2r (holding v2), and we have + // x2r = [ if (v1[i] != NaN && v2[i] != NaN) {min_max(v1[i], v2[i])} else {v2[i]} for i in 0..LANE_NUM] + c.assembler.CompileRegisterToRegister(minOrMaxInst, x1r, x2r) + + // Copy the current tmp into x1r. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, tmp, x1r) + + // Set all bits on the lane where either v1[i] or v2[i] is NaN by via CMPPS/CMPPD (arg=3). + // That means, we have: + // x1r = [ if (v1[i] != NaN && v2[i] != NaN) {0} else {^0} for i in 0..4] + // + // See https://www.felixcloutier.com/x86/cmpps. + c.assembler.CompileRegisterToRegisterWithArg(cmpInst, x2r, x1r, 3) + + // Mask all the lanes where either v1[i] or v2[i] is NaN, meaning that we have + // tmp = [ if (v1[i] != NaN && v2[i] != NaN) {min_max(v1[i], v2[i])} else {^0} for i in 0..LANE_NUM] + c.assembler.CompileRegisterToRegister(orInst, x1r, tmp) + + // Put the inverse of NaN if either v1[i] or v2[i] is NaN on each lane, otherwise zero on x1r. + // That means, we have: + // x1r = [ if (v1[i] != NaN && v2[i] != NaN) {0} else {^NaN} for i in 0..LANE_NUM] + // + c.assembler.CompileConstToRegister(logicalRightShiftInst, shiftNumToInverseNaN, x1r) + + // Finally, we get the result but putting NaNs on each lane where either of v1[i] or v2[i] is NaN, otherwise min_max(v1[i], v2[i]). + // That means, we have: + // x1r = [ if (v1[i] != NaN && v2[i] != NaN) {min_max(v1[i], v2[i])} else {NaN} for i in 0..LANE_NUM] + c.assembler.CompileRegisterToRegister(andnInst, tmp, x1r) + + c.locationStack.markRegisterUnused(x2r) + c.pushVectorRuntimeValueLocationOnRegister(x1r) + return nil +} + +// compileV128Max implements compiler.compileV128Max for amd64. +func (c *amd64Compiler) compileV128Max(o *wazeroir.OperationV128Max) error { + if o.Shape >= wazeroir.ShapeF32x4 { + return c.compileV128MinOrMaxFloat(o.Shape, false) + } + + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeI8x16: + if o.Signed { + inst = amd64.PMAXSB + } else { + inst = amd64.PMAXUB + } + case wazeroir.ShapeI16x8: + if o.Signed { + inst = amd64.PMAXSW + } else { + inst = amd64.PMAXUW + } + case wazeroir.ShapeI32x4: + if o.Signed { + inst = amd64.PMAXSD + } else { + inst = amd64.PMAXUD + } + } + + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +// compileV128AvgrU implements compiler.compileV128AvgrU for amd64. +func (c *amd64Compiler) compileV128AvgrU(o *wazeroir.OperationV128AvgrU) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + var inst asm.Instruction + switch o.Shape { + case wazeroir.ShapeI8x16: + inst = amd64.PAVGB + case wazeroir.ShapeI16x8: + inst = amd64.PAVGW + } + + c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +// compileV128Pmin implements compiler.compileV128Pmin for amd64. +func (c *amd64Compiler) compileV128Pmin(o *wazeroir.OperationV128Pmin) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + var min asm.Instruction + if o.Shape == wazeroir.ShapeF32x4 { + min = amd64.MINPS + } else { + min = amd64.MINPD + } + + x1r, v2r := x1.register, x2.register + + c.assembler.CompileRegisterToRegister(min, x1r, v2r) + + c.locationStack.markRegisterUnused(x1r) + c.pushVectorRuntimeValueLocationOnRegister(v2r) + return nil +} + +// compileV128Pmax implements compiler.compileV128Pmax for amd64. +func (c *amd64Compiler) compileV128Pmax(o *wazeroir.OperationV128Pmax) error { + + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + var min asm.Instruction + if o.Shape == wazeroir.ShapeF32x4 { + min = amd64.MAXPS + } else { + min = amd64.MAXPD + } + + x1r, v2r := x1.register, x2.register + + c.assembler.CompileRegisterToRegister(min, x1r, v2r) + + c.locationStack.markRegisterUnused(x1r) + c.pushVectorRuntimeValueLocationOnRegister(v2r) + return nil +} + +// compileV128Ceil implements compiler.compileV128Ceil for amd64. +func (c *amd64Compiler) compileV128Ceil(o *wazeroir.OperationV128Ceil) error { + // See https://www.felixcloutier.com/x86/roundpd + const roundModeCeil = 0x2 + return c.compileV128RoundImpl(o.Shape == wazeroir.ShapeF32x4, roundModeCeil) +} + +// compileV128Floor implements compiler.compileV128Floor for amd64. +func (c *amd64Compiler) compileV128Floor(o *wazeroir.OperationV128Floor) error { + // See https://www.felixcloutier.com/x86/roundpd + const roundModeFloor = 0x1 + return c.compileV128RoundImpl(o.Shape == wazeroir.ShapeF32x4, roundModeFloor) +} + +// compileV128Trunc implements compiler.compileV128Trunc for amd64. +func (c *amd64Compiler) compileV128Trunc(o *wazeroir.OperationV128Trunc) error { + // See https://www.felixcloutier.com/x86/roundpd + const roundModeTrunc = 0x3 + return c.compileV128RoundImpl(o.Shape == wazeroir.ShapeF32x4, roundModeTrunc) +} + +// compileV128Nearest implements compiler.compileV128Nearest for amd64. +func (c *amd64Compiler) compileV128Nearest(o *wazeroir.OperationV128Nearest) error { + // See https://www.felixcloutier.com/x86/roundpd + const roundModeNearest = 0x0 + return c.compileV128RoundImpl(o.Shape == wazeroir.ShapeF32x4, roundModeNearest) +} + +// compileV128RoundImpl implements compileV128Nearest compileV128Trunc compileV128Floor and compileV128Ceil +// with ROUNDPS (32-bit lane) and ROUNDPD (64-bit lane). +func (c *amd64Compiler) compileV128RoundImpl(is32bit bool, mode byte) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + var round asm.Instruction + if is32bit { + round = amd64.ROUNDPS + } else { + round = amd64.ROUNDPD + } + + c.assembler.CompileRegisterToRegisterWithArg(round, vr, vr, mode) + c.pushVectorRuntimeValueLocationOnRegister(vr) + return nil +} + +// compileV128Extend implements compiler.compileV128Extend for amd64. +func (c *amd64Compiler) compileV128Extend(o *wazeroir.OperationV128Extend) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + if !o.UseLow { + // We have to shift the higher 64-bits into the lower ones before the actual extending instruction. + // Shifting right by 0x8 * 8 = 64bits and concatenate itself. + // See https://www.felixcloutier.com/x86/palignr + c.assembler.CompileRegisterToRegisterWithArg(amd64.PALIGNR, v.register, v.register, 0x8) + } + + var extend asm.Instruction + switch o.OriginShape { + case wazeroir.ShapeI8x16: + if o.Signed { + extend = amd64.PMOVSXBW + } else { + extend = amd64.PMOVZXBW + } + case wazeroir.ShapeI16x8: + if o.Signed { + extend = amd64.PMOVSXWD + } else { + extend = amd64.PMOVZXWD + } + case wazeroir.ShapeI32x4: + if o.Signed { + extend = amd64.PMOVSXDQ + } else { + extend = amd64.PMOVZXDQ + } + } + + c.assembler.CompileRegisterToRegister(extend, vr, vr) + c.pushVectorRuntimeValueLocationOnRegister(vr) + return nil +} + +// compileV128ExtMul implements compiler.compileV128ExtMul for amd64. +func (c *amd64Compiler) compileV128ExtMul(o *wazeroir.OperationV128ExtMul) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + x1r, x2r := x1.register, x2.register + + switch o.OriginShape { + case wazeroir.ShapeI8x16: + if !o.UseLow { + // We have to shift the higher 64-bits into the lower ones before the actual extending instruction. + // Shifting right by 0x8 * 8 = 64bits and concatenate itself. + // See https://www.felixcloutier.com/x86/palignr + c.assembler.CompileRegisterToRegisterWithArg(amd64.PALIGNR, x1r, x1r, 0x8) + c.assembler.CompileRegisterToRegisterWithArg(amd64.PALIGNR, x2r, x2r, 0x8) + } + + var ext asm.Instruction + if o.Signed { + ext = amd64.PMOVSXBW + } else { + ext = amd64.PMOVZXBW + } + + // Signed or Zero extend lower half packed bytes to packed words. + c.assembler.CompileRegisterToRegister(ext, x1r, x1r) + c.assembler.CompileRegisterToRegister(ext, x2r, x2r) + + c.assembler.CompileRegisterToRegister(amd64.PMULLW, x2r, x1r) + case wazeroir.ShapeI16x8: + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + // Copy the value on x1r to tmp. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, x1r, tmp) + + // Multiply the values and store the lower 16-bits into x1r. + c.assembler.CompileRegisterToRegister(amd64.PMULLW, x2r, x1r) + if o.Signed { + // Signed multiply the values and store the higher 16-bits into tmp. + c.assembler.CompileRegisterToRegister(amd64.PMULHW, x2r, tmp) + } else { + // Unsigned multiply the values and store the higher 16-bits into tmp. + c.assembler.CompileRegisterToRegister(amd64.PMULHUW, x2r, tmp) + } + + // Unpack lower or higher half of vectors (tmp and x1r) and concatenate them. + if o.UseLow { + c.assembler.CompileRegisterToRegister(amd64.PUNPCKLWD, tmp, x1r) + } else { + c.assembler.CompileRegisterToRegister(amd64.PUNPCKHWD, tmp, x1r) + } + case wazeroir.ShapeI32x4: + var shuffleOrder byte + // Given that the original state of the register is as [v1, v2, v3, v4] where vN = a word, + if o.UseLow { + // This makes the register as [v1, v1, v2, v2] + shuffleOrder = 0b01010000 + } else { + // This makes the register as [v3, v3, v4, v4] + shuffleOrder = 0b11111010 + } + // See https://www.felixcloutier.com/x86/pshufd + c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, x1r, x1r, shuffleOrder) + c.assembler.CompileRegisterToRegisterWithArg(amd64.PSHUFD, x2r, x2r, shuffleOrder) + + var mul asm.Instruction + if o.Signed { + mul = amd64.PMULDQ + } else { + mul = amd64.PMULUDQ + } + c.assembler.CompileRegisterToRegister(mul, x2r, x1r) + } + + c.locationStack.markRegisterUnused(x2r) + c.pushVectorRuntimeValueLocationOnRegister(x1r) + return nil +} + +var q15mulrSatSMask = [16]byte{ + 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, + 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, +} + +// compileV128Q15mulrSatS implements compiler.compileV128Q15mulrSatS for amd64. +func (c *amd64Compiler) compileV128Q15mulrSatS(*wazeroir.OperationV128Q15mulrSatS) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + x1r, x2r := x1.register, x2.register + + // See https://github.com/WebAssembly/simd/pull/365 for the following logic. + if err := c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, q15mulrSatSMask[:], tmp); err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(amd64.PMULHRSW, x2r, x1r) + c.assembler.CompileRegisterToRegister(amd64.PCMPEQW, x1r, tmp) + c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, x1r) + + c.locationStack.markRegisterUnused(x2r) + c.pushVectorRuntimeValueLocationOnRegister(x1r) + return nil +} + +var ( + allOnesI8x16 = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1} + allOnesI16x8 = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0} + + extAddPairwiseI16x8uMask = [16 * 2]byte{ + 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, + } +) + +// compileV128ExtAddPairwise implements compiler.compileV128ExtAddPairwise for amd64. +func (c *amd64Compiler) compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAddPairwise) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + switch o.OriginShape { + case wazeroir.ShapeI8x16: + allOnesReg, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, + allOnesI8x16[:], allOnesReg); err != nil { + return err + } + + var result asm.Register + // See https://www.felixcloutier.com/x86/pmaddubsw for detail. + if o.Signed { + // Interpret vr's value as signed byte and multiply with one and add pairwise, which results in pairwise + // signed extadd. + c.assembler.CompileRegisterToRegister(amd64.PMADDUBSW, vr, allOnesReg) + result = allOnesReg + } else { + // Interpreter tmp (all ones) as signed byte meaning that all the multiply-add is unsigned. + c.assembler.CompileRegisterToRegister(amd64.PMADDUBSW, allOnesReg, vr) + result = vr + } + + if result != vr { + c.locationStack.markRegisterUnused(vr) + } + c.pushVectorRuntimeValueLocationOnRegister(result) + case wazeroir.ShapeI16x8: + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + if o.Signed { + // See https://www.felixcloutier.com/x86/pmaddwd + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, allOnesI16x8[:], tmp); err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr) + c.pushVectorRuntimeValueLocationOnRegister(vr) + } else { + + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, extAddPairwiseI16x8uMask[:16], tmp); err != nil { + return err + } + + // Flip the sign bits on vr. + // + // Assuming that vr = [w1, ..., w8], now we have, + // vr[i] = int8(-w1) for i = 0...8 + c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, vr) + + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, allOnesI16x8[:], tmp); err != nil { + return err + } + + // For i = 0,..4 (as this results in i32x4 lanes), now we have + // vr[i] = int32(-wn + -w(n+1)) = int32(-(wn + w(n+1))) + c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr) + + // tmp[i] = [0, 0, 1, 0] = int32(math.MaxInt16+1) + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, extAddPairwiseI16x8uMask[16:], tmp); err != nil { + return err + } + + // vr[i] = int32(-(wn + w(n+1))) + int32(math.MaxInt16+1) = int32((wn + w(n+1))) = uint32(wn + w(n+1)). + c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr) + c.pushVectorRuntimeValueLocationOnRegister(vr) + } + } + return nil +} + +// compileV128FloatPromote implements compiler.compileV128FloatPromote for amd64. +func (c *amd64Compiler) compileV128FloatPromote(*wazeroir.OperationV128FloatPromote) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + c.assembler.CompileRegisterToRegister(amd64.CVTPS2PD, vr, vr) + c.pushVectorRuntimeValueLocationOnRegister(vr) + return nil +} + +// compileV128FloatDemote implements compiler.compileV128FloatDemote for amd64. +func (c *amd64Compiler) compileV128FloatDemote(*wazeroir.OperationV128FloatDemote) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + c.assembler.CompileRegisterToRegister(amd64.CVTPD2PS, vr, vr) + c.pushVectorRuntimeValueLocationOnRegister(vr) + return nil +} + +// compileV128Dot implements compiler.compileV128Dot for amd64. +func (c *amd64Compiler) compileV128Dot(*wazeroir.OperationV128Dot) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(amd64.PMADDWD, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +var fConvertFromIMask = [16]byte{ + 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +} + +// compileV128FConvertFromI implements compiler.compileV128FConvertFromI for amd64. +func (c *amd64Compiler) compileV128FConvertFromI(o *wazeroir.OperationV128FConvertFromI) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + switch o.DestinationShape { + case wazeroir.ShapeF32x4: + if o.Signed { + c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PS, vr, vr) + } else { + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + // Copy the value into tmp. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp) + + // Clear the higher 16-bits of tmp. + c.assembler.CompileConstToRegister(amd64.PSLLD, 0xa, tmp) + c.assembler.CompileConstToRegister(amd64.PSRLD, 0xa, tmp) + + // Subtract the higher 16-bits from vr == clear the lower 16-bits of vr. + c.assembler.CompileRegisterToRegister(amd64.PSUBD, tmp, vr) + + // Convert the lower 16-bits in tmp. + c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PS, tmp, tmp) + + // Left shift by one and convert vr, meaning that halved conversion result of higher 16-bits in vr. + c.assembler.CompileConstToRegister(amd64.PSRLD, 1, vr) + c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PS, vr, vr) + + // Double the converted halved higher 16bits. + c.assembler.CompileRegisterToRegister(amd64.ADDPS, vr, vr) + + // Get the conversion result by add tmp (holding lower 16-bit conversion) into vr. + c.assembler.CompileRegisterToRegister(amd64.ADDPS, tmp, vr) + } + case wazeroir.ShapeF64x2: + if o.Signed { + c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PD, vr, vr) + } else { + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + // tmp = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, fConvertFromIMask[:16], tmp); err != nil { + return err + } + + // Given that we have vr = [d1, d2, d3, d4], this results in + // vr = [d1, [0x00, 0x00, 0x30, 0x43], d2, [0x00, 0x00, 0x30, 0x43]] + // = [float64(uint32(d1)) + 0x1.0p52, float64(uint32(d2)) + 0x1.0p52] + // ^See https://stackoverflow.com/questions/13269523/can-all-32-bit-ints-be-exactly-represented-as-a-double + c.assembler.CompileRegisterToRegister(amd64.UNPCKLPS, tmp, vr) + + // tmp = [float64(0x1.0p52), float64(0x1.0p52)] + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, twop52[:], tmp); err != nil { + return err + } + + // Now, we get the result as + // vr = [float64(uint32(d1)), float64(uint32(d2))] + // because the following equality always satisfies: + // float64(0x1.0p52 + float64(uint32(x))) - float64(0x1.0p52 + float64(uint32(y))) = float64(uint32(x)) - float64(uint32(y)) + c.assembler.CompileRegisterToRegister(amd64.SUBPD, tmp, vr) + } + } + + c.pushVectorRuntimeValueLocationOnRegister(vr) + return nil +} + +// compileV128Narrow implements compiler.compileV128Narrow for amd64. +func (c *amd64Compiler) compileV128Narrow(o *wazeroir.OperationV128Narrow) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + var narrow asm.Instruction + switch o.OriginShape { + case wazeroir.ShapeI16x8: + if o.Signed { + narrow = amd64.PACKSSWB + } else { + narrow = amd64.PACKUSWB + } + case wazeroir.ShapeI32x4: + if o.Signed { + narrow = amd64.PACKSSDW + } else { + narrow = amd64.PACKUSDW + } + } + c.assembler.CompileRegisterToRegister(narrow, x2.register, x1.register) + + c.locationStack.markRegisterUnused(x2.register) + c.pushVectorRuntimeValueLocationOnRegister(x1.register) + return nil +} + +var ( + // i32sMaxOnF64x2 holds math.MaxInt32(=2147483647.0) on two f64 lanes. + i32sMaxOnF64x2 = [16]byte{ + 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0) + 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0) + } + + // i32sMaxOnF64x2 holds math.MaxUint32(=4294967295.0) on two f64 lanes. + i32uMaxOnF64x2 = [16]byte{ + 0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0) + 0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0) + } + + // twop52 holds two float64(0x1.0p52) on two f64 lanes. 0x1.0p52 is special in the sense that + // with this exponent, the mantissa represents a corresponding uint32 number, and arithmetics, + // like addition or subtraction, the resulted floating point holds exactly the same + // bit representations in 32-bit integer on its mantissa. + // + // Note: the name twop52 is common across various compiler ecosystem. + // E.g. https://github.com/llvm/llvm-project/blob/92ab024f81e5b64e258b7c3baaf213c7c26fcf40/compiler-rt/lib/builtins/floatdidf.c#L28 + // E.g. https://opensource.apple.com/source/clang/clang-425.0.24/src/projects/compiler-rt/lib/floatdidf.c.auto.html + twop52 = [16]byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52) + } +) + +// compileV128ITruncSatFromF implements compiler.compileV128ITruncSatFromF for amd64. +func (c *amd64Compiler) compileV128ITruncSatFromF(o *wazeroir.OperationV128ITruncSatFromF) error { + v := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(v); err != nil { + return err + } + vr := v.register + + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + c.locationStack.markRegisterUsed(tmp) + + switch o.OriginShape { + case wazeroir.ShapeF32x4: + if o.Signed { + // Copy the value into tmp. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp) + + // Assuming we have vr = [v1, v2, v3, v4]. + // + // Set all bits if lane is not NaN on tmp. + // tmp[i] = 0xffffffff if vi != NaN + // = 0 if vi == NaN + c.assembler.CompileRegisterToRegister(amd64.CMPEQPS, tmp, tmp) + + // Clear NaN lanes on vr, meaning that + // vr[i] = vi if vi != NaN + // 0 if vi == NaN + c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmp, vr) + + // tmp[i] = ^vi if vi != NaN + // = 0xffffffff if vi == NaN + // which means that tmp[i] & 0x80000000 != 0 if and only if vi is negative. + c.assembler.CompileRegisterToRegister(amd64.PXOR, vr, tmp) + + // vr[i] = int32(vi) if vi != NaN and vr is not overflowing. + // = 0x80000000 if vi != NaN and vr is overflowing (See https://www.felixcloutier.com/x86/cvttps2dq) + // = 0 if vi == NaN + c.assembler.CompileRegisterToRegister(amd64.CVTTPS2DQ, vr, vr) + + // Below, we have to convert 0x80000000 into 0x7FFFFFFF for positive overflowing lane. + // + // tmp[i] = 0x80000000 if vi is positive + // = any satisfying any&0x80000000 = 0 if vi is negative or zero. + c.assembler.CompileRegisterToRegister(amd64.PAND, vr, tmp) + + // Arithmetic right shifting tmp by 31, meaning that we have + // tmp[i] = 0xffffffff if vi is positive, 0 otherwise. + c.assembler.CompileConstToRegister(amd64.PSRAD, 0x1f, tmp) + + // Flipping 0x80000000 if vi is positive, otherwise keep intact. + c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, vr) + } else { + tmp2, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + // See https://github.com/bytecodealliance/wasmtime/pull/2440 + // Note: even v8 doesn't seem to have support for this i32x4.tranc_sat_f32x4_u. + c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp) + c.assembler.CompileRegisterToRegister(amd64.MAXPS, tmp, vr) + c.assembler.CompileRegisterToRegister(amd64.PCMPEQD, tmp, tmp) + c.assembler.CompileConstToRegister(amd64.PSRLD, 0x1, tmp) + c.assembler.CompileRegisterToRegister(amd64.CVTDQ2PS, tmp, tmp) + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp2) + c.assembler.CompileRegisterToRegister(amd64.CVTTPS2DQ, vr, vr) + c.assembler.CompileRegisterToRegister(amd64.SUBPS, tmp, tmp2) + c.assembler.CompileRegisterToRegisterWithArg(amd64.CMPPS, tmp2, tmp, 0x2) // == CMPLEPS + c.assembler.CompileRegisterToRegister(amd64.CVTTPS2DQ, tmp2, tmp2) + c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp2) + c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp) + c.assembler.CompileRegisterToRegister(amd64.PMAXSD, tmp, tmp2) + c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp2, vr) + } + case wazeroir.ShapeF64x2: + tmp2, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + if o.Signed { + // Copy the value into tmp. + c.assembler.CompileRegisterToRegister(amd64.MOVDQA, vr, tmp) + + // Set all bits for non-NaN lanes, zeros otherwise. + // I.e. tmp[i] = 0xffffffff_ffffffff if vi != NaN, 0 otherwise. + c.assembler.CompileRegisterToRegister(amd64.CMPEQPD, tmp, tmp) + + // Load the 2147483647 into tmp2's each lane. + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVUPD, i32sMaxOnF64x2[:], tmp2); err != nil { + return err + } + + // tmp[i] = 2147483647 if vi != NaN, 0 otherwise. + c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmp2, tmp) + + // MINPD returns the source register's value as-is, so we have + // vr[i] = vi if vi != NaN + // = 0 if vi == NaN + c.assembler.CompileRegisterToRegister(amd64.MINPD, tmp, vr) + + c.assembler.CompileRegisterToRegister(amd64.CVTTPD2DQ, vr, vr) + } else { + // Clears all bits on tmp. + c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, tmp) + + // vr[i] = vi if vi != NaN && vi > 0 + // = 0 if vi == NaN || vi <= 0 + c.assembler.CompileRegisterToRegister(amd64.MAXPD, tmp, vr) + + // tmp2[i] = float64(math.MaxUint32) = math.MaxUint32 + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVUPD, i32uMaxOnF64x2[:], tmp2); err != nil { + return err + } + + // vr[i] = vi if vi != NaN && vi > 0 && vi <= math.MaxUint32 + // = 0 otherwise + c.assembler.CompileRegisterToRegister(amd64.MINPD, tmp2, vr) + + // Round the floating points into integer. + c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDPD, vr, vr, 0x3) + + // tmp2[i] = float64(0x1.0p52) + if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVUPD, twop52[:], tmp2); err != nil { + return err + } + + // vr[i] = float64(0x1.0p52) + float64(uint32(vi)) if vi != NaN && vi > 0 && vi <= math.MaxUint32 + // = 0 otherwise + // + // This means that vr[i] holds exactly the same bit of uint32(vi) in its lower 32-bits. + c.assembler.CompileRegisterToRegister(amd64.ADDPD, tmp2, vr) + + // At this point, we have + // vr = [uint32(v0), float64(0x1.0p52), uint32(v1), float64(0x1.0p52)] + // tmp = [0, 0, 0, 0] + // as 32x4 lanes. Therefore, SHUFPS with 0b00_00_10_00 results in + // vr = [vr[00], vr[10], tmp[00], tmp[00]] = [vr[00], vr[10], 0, 0] + // meaning that for i = 0 and 1, we have + // vr[i] = uint32(vi) if vi != NaN && vi > 0 && vi <= math.MaxUint32 + // = 0 otherwise. + c.assembler.CompileRegisterToRegisterWithArg(amd64.SHUFPS, tmp, vr, 0b00_00_10_00) + } + } + + c.locationStack.markRegisterUnused(tmp) + c.pushVectorRuntimeValueLocationOnRegister(vr) + return nil +} diff --git a/internal/engine/compiler/impl_vec_arm64.go b/internal/engine/compiler/impl_vec_arm64.go index ee1d368cdd9..c406972ceb2 100644 --- a/internal/engine/compiler/impl_vec_arm64.go +++ b/internal/engine/compiler/impl_vec_arm64.go @@ -135,7 +135,7 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro } switch o.Type { - case wazeroir.LoadV128Type128: + case wazeroir.V128LoadType128: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 16) if err != nil { return err @@ -143,7 +143,7 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementQ, ) - case wazeroir.LoadV128Type8x8s: + case wazeroir.V128LoadType8x8s: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) if err != nil { return err @@ -153,7 +153,7 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) - case wazeroir.LoadV128Type8x8u: + case wazeroir.V128LoadType8x8u: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) if err != nil { return err @@ -163,7 +163,7 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) - case wazeroir.LoadV128Type16x4s: + case wazeroir.V128LoadType16x4s: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) if err != nil { return err @@ -173,7 +173,7 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone) - case wazeroir.LoadV128Type16x4u: + case wazeroir.V128LoadType16x4u: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) if err != nil { return err @@ -183,7 +183,7 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone) - case wazeroir.LoadV128Type32x2s: + case wazeroir.V128LoadType32x2s: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) if err != nil { return err @@ -193,7 +193,7 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) - case wazeroir.LoadV128Type32x2u: + case wazeroir.V128LoadType32x2u: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) if err != nil { return err @@ -203,35 +203,35 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro ) c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) - case wazeroir.LoadV128Type8Splat: + case wazeroir.V128LoadType8Splat: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 1) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement16B) - case wazeroir.LoadV128Type16Splat: + case wazeroir.V128LoadType16Splat: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 2) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement8H) - case wazeroir.LoadV128Type32Splat: + case wazeroir.V128LoadType32Splat: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 4) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement4S) - case wazeroir.LoadV128Type64Splat: + case wazeroir.V128LoadType64Splat: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) if err != nil { return err } c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement2D) - case wazeroir.LoadV128Type32zero: + case wazeroir.V128LoadType32zero: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 16) if err != nil { return err @@ -239,7 +239,7 @@ func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err erro c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementS, ) - case wazeroir.LoadV128Type64zero: + case wazeroir.V128LoadType64zero: offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 16) if err != nil { return err @@ -702,3 +702,138 @@ func (c *arm64Compiler) compileV128Shl(o *wazeroir.OperationV128Shl) error { func (c *arm64Compiler) compileV128Cmp(o *wazeroir.OperationV128Cmp) error { return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) } + +// compileV128AddSat implements compiler.compileV128AddSat for arm64. +func (c *arm64Compiler) compileV128AddSat(o *wazeroir.OperationV128AddSat) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128SubSat implements compiler.compileV128SubSat for arm64. +func (c *arm64Compiler) compileV128SubSat(o *wazeroir.OperationV128SubSat) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Mul implements compiler.compileV128Mul for arm64. +func (c *arm64Compiler) compileV128Mul(o *wazeroir.OperationV128Mul) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Div implements compiler.compileV128Div for arm64. +func (c *arm64Compiler) compileV128Div(o *wazeroir.OperationV128Div) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Neg implements compiler.compileV128Neg for arm64. +func (c *arm64Compiler) compileV128Neg(o *wazeroir.OperationV128Neg) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Sqrt implements compiler.compileV128Sqrt for arm64. +func (c *arm64Compiler) compileV128Sqrt(o *wazeroir.OperationV128Sqrt) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Abs implements compiler.compileV128Abs for arm64. +func (c *arm64Compiler) compileV128Abs(o *wazeroir.OperationV128Abs) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Popcnt implements compiler.compileV128Popcnt for arm64. +func (c *arm64Compiler) compileV128Popcnt(o *wazeroir.OperationV128Popcnt) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Min implements compiler.compileV128Min for arm64. +func (c *arm64Compiler) compileV128Min(o *wazeroir.OperationV128Min) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Max implements compiler.compileV128Max for arm64. +func (c *arm64Compiler) compileV128Max(o *wazeroir.OperationV128Max) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128AvgrU implements compiler.compileV128AvgrU for arm64. +func (c *arm64Compiler) compileV128AvgrU(o *wazeroir.OperationV128AvgrU) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Pmin implements compiler.compileV128Pmin for arm64. +func (c *arm64Compiler) compileV128Pmin(o *wazeroir.OperationV128Pmin) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Pmax implements compiler.compileV128Pmax for arm64. +func (c *arm64Compiler) compileV128Pmax(o *wazeroir.OperationV128Pmax) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Ceil implements compiler.compileV128Ceil for arm64. +func (c *arm64Compiler) compileV128Ceil(o *wazeroir.OperationV128Ceil) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Floor implements compiler.compileV128Floor for arm64. +func (c *arm64Compiler) compileV128Floor(o *wazeroir.OperationV128Floor) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Trunc implements compiler.compileV128Trunc for arm64. +func (c *arm64Compiler) compileV128Trunc(o *wazeroir.OperationV128Trunc) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Nearest implements compiler.compileV128Nearest for arm64. +func (c *arm64Compiler) compileV128Nearest(o *wazeroir.OperationV128Nearest) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Extend implements compiler.compileV128Extend for arm64. +func (c *arm64Compiler) compileV128Extend(o *wazeroir.OperationV128Extend) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128ExtMul implements compiler.compileV128ExtMul for arm64. +func (c *arm64Compiler) compileV128ExtMul(o *wazeroir.OperationV128ExtMul) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Q15mulrSatS implements compiler.compileV128Q15mulrSatS for arm64. +func (c *arm64Compiler) compileV128Q15mulrSatS(o *wazeroir.OperationV128Q15mulrSatS) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128ExtAddPairwise implements compiler.compileV128ExtAddPairwise for arm64. +func (c *arm64Compiler) compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAddPairwise) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128FloatPromote implements compiler.compileV128FloatPromote for arm64. +func (c *arm64Compiler) compileV128FloatPromote(o *wazeroir.OperationV128FloatPromote) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128FloatDemote implements compiler.compileV128FloatDemote for arm64. +func (c *arm64Compiler) compileV128FloatDemote(o *wazeroir.OperationV128FloatDemote) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128FConvertFromI implements compiler.compileV128FConvertFromI for arm64. +func (c *arm64Compiler) compileV128FConvertFromI(o *wazeroir.OperationV128FConvertFromI) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Dot implements compiler.compileV128Dot for arm64. +func (c *arm64Compiler) compileV128Dot(o *wazeroir.OperationV128Dot) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128Narrow implements compiler.compileV128Narrow for arm64. +func (c *arm64Compiler) compileV128Narrow(o *wazeroir.OperationV128Narrow) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} + +// compileV128ITruncSatFromF implements compiler.compileV128ITruncSatFromF for arm64. +func (c *arm64Compiler) compileV128ITruncSatFromF(o *wazeroir.OperationV128ITruncSatFromF) error { + return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +} diff --git a/internal/engine/interpreter/interpreter.go b/internal/engine/interpreter/interpreter.go index 75dfd41a33b..38b800f08da 100644 --- a/internal/engine/interpreter/interpreter.go +++ b/internal/engine/interpreter/interpreter.go @@ -90,7 +90,7 @@ type callEngine struct { frames []*callFrame } -func (me *moduleEngine) newCallEngine() *callEngine { +func (e *moduleEngine) newCallEngine() *callEngine { return &callEngine{} } @@ -276,9 +276,9 @@ func (e *engine) NewModuleEngine(name string, module *wasm.Module, importedFunct return me, wasm.ErrElementOffsetOutOfBounds } - for i, funcindex := range init.FunctionIndexes { - if funcindex != nil { - references[init.Offset+uint32(i)] = uintptr(unsafe.Pointer(me.functions[*funcindex])) + for i, fnIndex := range init.FunctionIndexes { + if fnIndex != nil { + references[init.Offset+uint32(i)] = uintptr(unsafe.Pointer(me.functions[*fnIndex])) } } } @@ -430,12 +430,10 @@ func (e *engine) lowerIR(ir *wazeroir.CompilationResult) (*code, error) { op.us[0] = uint64(o.Arg.Alignment) op.us[1] = uint64(o.Arg.Offset) case *wazeroir.OperationStore8: - op.b1 = byte(o.Type) op.us = make([]uint64, 2) op.us[0] = uint64(o.Arg.Alignment) op.us[1] = uint64(o.Arg.Offset) case *wazeroir.OperationStore16: - op.b1 = byte(o.Type) op.us = make([]uint64, 2) op.us[0] = uint64(o.Arg.Alignment) op.us[1] = uint64(o.Arg.Offset) @@ -536,7 +534,7 @@ func (e *engine) lowerIR(ir *wazeroir.CompilationResult) (*code, error) { *wazeroir.OperationF32ReinterpretFromI32, *wazeroir.OperationF64ReinterpretFromI64: // Reinterpret ops are essentially nop for engine mode - // because we treat all values as uint64, and the reinterpret is only used at module + // because we treat all values as uint64, and Reinterpret* is only used at module // validation phase where we check type soundness of all the operations. // So just eliminate the ops. continue @@ -645,6 +643,71 @@ func (e *engine) lowerIR(ir *wazeroir.CompilationResult) (*code, error) { op.b1 = o.Shape case *wazeroir.OperationV128Cmp: op.b1 = o.Type + case *wazeroir.OperationV128AddSat: + op.b1 = o.Shape + op.b3 = o.Signed + case *wazeroir.OperationV128SubSat: + op.b1 = o.Shape + op.b3 = o.Signed + case *wazeroir.OperationV128Mul: + op.b1 = o.Shape + case *wazeroir.OperationV128Div: + op.b1 = o.Shape + case *wazeroir.OperationV128Neg: + op.b1 = o.Shape + case *wazeroir.OperationV128Sqrt: + op.b1 = o.Shape + case *wazeroir.OperationV128Abs: + op.b1 = o.Shape + case *wazeroir.OperationV128Popcnt: + case *wazeroir.OperationV128Min: + op.b1 = o.Shape + op.b3 = o.Signed + case *wazeroir.OperationV128Max: + op.b1 = o.Shape + op.b3 = o.Signed + case *wazeroir.OperationV128AvgrU: + op.b1 = o.Shape + case *wazeroir.OperationV128Pmin: + op.b1 = o.Shape + case *wazeroir.OperationV128Pmax: + op.b1 = o.Shape + case *wazeroir.OperationV128Ceil: + op.b1 = o.Shape + case *wazeroir.OperationV128Floor: + op.b1 = o.Shape + case *wazeroir.OperationV128Trunc: + op.b1 = o.Shape + case *wazeroir.OperationV128Nearest: + op.b1 = o.Shape + case *wazeroir.OperationV128Extend: + op.b1 = o.OriginShape + if o.Signed { + op.b2 = 1 + } + op.b3 = o.UseLow + case *wazeroir.OperationV128ExtMul: + op.b1 = o.OriginShape + if o.Signed { + op.b2 = 1 + } + op.b3 = o.UseLow + case *wazeroir.OperationV128Q15mulrSatS: + case *wazeroir.OperationV128ExtAddPairwise: + op.b1 = o.OriginShape + op.b3 = o.Signed + case *wazeroir.OperationV128FloatPromote: + case *wazeroir.OperationV128FloatDemote: + case *wazeroir.OperationV128FConvertFromI: + op.b1 = o.DestinationShape + op.b3 = o.Signed + case *wazeroir.OperationV128Dot: + case *wazeroir.OperationV128Narrow: + op.b1 = o.OriginShape + op.b3 = o.Signed + case *wazeroir.OperationV128ITruncSatFromF: + op.b1 = o.OriginShape + op.b3 = o.Signed default: panic(fmt.Errorf("BUG: unimplemented operation %s", op.kind.String())) } @@ -662,16 +725,16 @@ func (e *engine) lowerIR(ir *wazeroir.CompilationResult) (*code, error) { } // Name implements the same method as documented on wasm.ModuleEngine. -func (me *moduleEngine) Name() string { - return me.name +func (e *moduleEngine) Name() string { + return e.name } // CreateFuncElementInstance implements the same method as documented on wasm.ModuleEngine. -func (me *moduleEngine) CreateFuncElementInstance(indexes []*wasm.Index) *wasm.ElementInstance { +func (e *moduleEngine) CreateFuncElementInstance(indexes []*wasm.Index) *wasm.ElementInstance { refs := make([]wasm.Reference, len(indexes)) for i, index := range indexes { if index != nil { - refs[i] = uintptr(unsafe.Pointer(me.functions[*index])) + refs[i] = uintptr(unsafe.Pointer(e.functions[*index])) } } return &wasm.ElementInstance{ @@ -681,27 +744,27 @@ func (me *moduleEngine) CreateFuncElementInstance(indexes []*wasm.Index) *wasm.E } // InitializeFuncrefGlobals implements the same method as documented on wasm.InitializeFuncrefGlobals. -func (me *moduleEngine) InitializeFuncrefGlobals(globals []*wasm.GlobalInstance) { +func (e *moduleEngine) InitializeFuncrefGlobals(globals []*wasm.GlobalInstance) { for _, g := range globals { if g.Type.ValType == wasm.ValueTypeFuncref { if int64(g.Val) == wasm.GlobalInstanceNullFuncRefValue { g.Val = 0 // Null funcref is expressed as zero. } else { // Lowers the stored function index into the interpreter specific function's opaque pointer. - g.Val = uint64(uintptr(unsafe.Pointer(me.functions[g.Val]))) + g.Val = uint64(uintptr(unsafe.Pointer(e.functions[g.Val]))) } } } } // Call implements the same method as documented on wasm.ModuleEngine. -func (me *moduleEngine) Call(ctx context.Context, m *wasm.CallContext, f *wasm.FunctionInstance, params ...uint64) (results []uint64, err error) { +func (e *moduleEngine) Call(ctx context.Context, m *wasm.CallContext, f *wasm.FunctionInstance, params ...uint64) (results []uint64, err error) { // Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to // code on close aren't locked, neither is this read. - compiled := me.functions[f.Idx] + compiled := e.functions[f.Idx] if compiled == nil { // Lazy check the cause as it could be because the module was already closed. if err = m.FailIfClosed(); err == nil { - panic(fmt.Errorf("BUG: %s.codes[%d] was nil before close", me.name, f.Idx)) + panic(fmt.Errorf("BUG: %s.codes[%d] was nil before close", e.name, f.Idx)) } return } @@ -712,7 +775,7 @@ func (me *moduleEngine) Call(ctx context.Context, m *wasm.CallContext, f *wasm.F return nil, fmt.Errorf("expected %d params, but passed %d", paramSignature, paramCount) } - ce := me.newCallEngine() + ce := e.newCallEngine() defer func() { // If the module closed during the call, and the call didn't err for another reason, set an ExitError. if err == nil { @@ -1363,7 +1426,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont } else { // Float64 const mask uint64 = 1 << 63 - ce.pushValue(uint64(ce.popValue() &^ mask)) + ce.pushValue(ce.popValue() &^ mask) } frame.pc++ case wazeroir.OperationKindNeg: @@ -1384,7 +1447,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue(uint64(math.Float32bits(float32(v)))) } else { // Float64 - v := math.Ceil(float64(math.Float64frombits(ce.popValue()))) + v := math.Ceil(math.Float64frombits(ce.popValue())) ce.pushValue(math.Float64bits(v)) } frame.pc++ @@ -1395,7 +1458,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue(uint64(math.Float32bits(float32(v)))) } else { // Float64 - v := math.Floor(float64(math.Float64frombits(ce.popValue()))) + v := math.Floor(math.Float64frombits(ce.popValue())) ce.pushValue(math.Float64bits(v)) } frame.pc++ @@ -1406,7 +1469,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue(uint64(math.Float32bits(float32(v)))) } else { // Float64 - v := math.Trunc(float64(math.Float64frombits(ce.popValue()))) + v := math.Trunc(math.Float64frombits(ce.popValue())) ce.pushValue(math.Float64bits(v)) } frame.pc++ @@ -1428,7 +1491,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue(uint64(math.Float32bits(float32(v)))) } else { // Float64 - v := math.Sqrt(float64(math.Float64frombits(ce.popValue()))) + v := math.Sqrt(math.Float64frombits(ce.popValue())) ce.pushValue(math.Float64bits(v)) } frame.pc++ @@ -1883,8 +1946,8 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue(hi) frame.pc++ case wazeroir.OperationKindV128Add: - xHigh, xLow := ce.popValue(), ce.popValue() yHigh, yLow := ce.popValue(), ce.popValue() + xHigh, xLow := ce.popValue(), ce.popValue() switch op.b1 { case wazeroir.ShapeI8x16: ce.pushValue( @@ -1914,6 +1977,18 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont case wazeroir.ShapeI64x2: ce.pushValue(xLow + yLow) ce.pushValue(xHigh + yHigh) + case wazeroir.ShapeF32x4: + ce.pushValue( + uint64(math.Float32bits(math.Float32frombits(uint32(xLow))+math.Float32frombits(uint32(yLow)))) | + (uint64(math.Float32bits(math.Float32frombits(uint32(xLow>>32))+math.Float32frombits(uint32(yLow>>32)))) << 32), + ) + ce.pushValue( + uint64(math.Float32bits(math.Float32frombits(uint32(xHigh))+math.Float32frombits(uint32(yHigh)))) | + (uint64(math.Float32bits(math.Float32frombits(uint32(xHigh>>32))+math.Float32frombits(uint32(yHigh>>32)))) << 32), + ) + case wazeroir.ShapeF64x2: + ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow))) + ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh))) } frame.pc++ case wazeroir.OperationKindV128Sub: @@ -1948,12 +2023,24 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont case wazeroir.ShapeI64x2: ce.pushValue(xLow - yLow) ce.pushValue(xHigh - yHigh) + case wazeroir.ShapeF32x4: + ce.pushValue( + uint64(math.Float32bits(math.Float32frombits(uint32(xLow))-math.Float32frombits(uint32(yLow)))) | + (uint64(math.Float32bits(math.Float32frombits(uint32(xLow>>32))-math.Float32frombits(uint32(yLow>>32)))) << 32), + ) + ce.pushValue( + uint64(math.Float32bits(math.Float32frombits(uint32(xHigh))-math.Float32frombits(uint32(yHigh)))) | + (uint64(math.Float32bits(math.Float32frombits(uint32(xHigh>>32))-math.Float32frombits(uint32(yHigh>>32)))) << 32), + ) + case wazeroir.ShapeF64x2: + ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow))) + ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh))) } frame.pc++ case wazeroir.OperationKindV128Load: offset := ce.popMemoryOffset(op) switch op.b1 { - case wazeroir.LoadV128Type128: + case wazeroir.V128LoadType128: lo, ok := memoryInst.ReadUint64Le(ctx, offset) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -1964,7 +2051,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) } ce.pushValue(hi) - case wazeroir.LoadV128Type8x8s: + case wazeroir.V128LoadType8x8s: data, ok := memoryInst.Read(ctx, offset, 8) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -1975,7 +2062,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue( uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))), ) - case wazeroir.LoadV128Type8x8u: + case wazeroir.V128LoadType8x8u: data, ok := memoryInst.Read(ctx, offset, 8) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -1986,7 +2073,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue( uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]), ) - case wazeroir.LoadV128Type16x4s: + case wazeroir.V128LoadType16x4s: data, ok := memoryInst.Read(ctx, offset, 8) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -1999,7 +2086,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 | uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))), ) - case wazeroir.LoadV128Type16x4u: + case wazeroir.V128LoadType16x4u: data, ok := memoryInst.Read(ctx, offset, 8) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -2010,21 +2097,21 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue( uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])), ) - case wazeroir.LoadV128Type32x2s: + case wazeroir.V128LoadType32x2s: data, ok := memoryInst.Read(ctx, offset, 8) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) } ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data)))) ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:])))) - case wazeroir.LoadV128Type32x2u: + case wazeroir.V128LoadType32x2u: data, ok := memoryInst.Read(ctx, offset, 8) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) } ce.pushValue(uint64(binary.LittleEndian.Uint32(data))) ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:]))) - case wazeroir.LoadV128Type8Splat: + case wazeroir.V128LoadType8Splat: v, ok := memoryInst.ReadByte(ctx, offset) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -2033,7 +2120,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v) ce.pushValue(v8) ce.pushValue(v8) - case wazeroir.LoadV128Type16Splat: + case wazeroir.V128LoadType16Splat: v, ok := memoryInst.ReadUint16Le(ctx, offset) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -2041,7 +2128,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v) ce.pushValue(v4) ce.pushValue(v4) - case wazeroir.LoadV128Type32Splat: + case wazeroir.V128LoadType32Splat: v, ok := memoryInst.ReadUint32Le(ctx, offset) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -2049,21 +2136,21 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont vv := uint64(v)<<32 | uint64(v) ce.pushValue(vv) ce.pushValue(vv) - case wazeroir.LoadV128Type64Splat: + case wazeroir.V128LoadType64Splat: lo, ok := memoryInst.ReadUint64Le(ctx, offset) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) } ce.pushValue(lo) ce.pushValue(lo) - case wazeroir.LoadV128Type32zero: + case wazeroir.V128LoadType32zero: lo, ok := memoryInst.ReadUint32Le(ctx, offset) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) } ce.pushValue(uint64(lo)) ce.pushValue(0) - case wazeroir.LoadV128Type64zero: + case wazeroir.V128LoadType64zero: lo, ok := memoryInst.ReadUint64Le(ctx, offset) if !ok { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) @@ -2924,9 +3011,1290 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, callCtx *wasm.CallCont ce.pushValue(retLo) ce.pushValue(retHi) frame.pc++ - } - } - ce.popFrame() + case wazeroir.OperationKindV128AddSat: + x2hi, x2Lo := ce.popValue(), ce.popValue() + x1hi, x1Lo := ce.popValue(), ce.popValue() + + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI8x16: + for i := 0; i < 16; i++ { + var v, w byte + if i < 8 { + v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8)) + } else { + v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8)) + } + + var uv uint64 + if op.b3 { // signed + if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 { + uv = uint64(byte(0x80)) + } else if subbed > math.MaxInt8 { + uv = uint64(byte(0x7f)) + } else { + uv = uint64(byte(int8(subbed))) + } + } else { + if subbed := int64(v) + int64(w); subbed < 0 { + uv = uint64(byte(0)) + } else if subbed > math.MaxUint8 { + uv = uint64(byte(0xff)) + } else { + uv = uint64(byte(subbed)) + } + } + + if i < 8 { + retLo |= uv << (i * 8) + } else { + retHi |= uv << ((i - 8) * 8) + } + } + case wazeroir.ShapeI16x8: + for i := 0; i < 8; i++ { + var v, w uint16 + if i < 4 { + v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16)) + } else { + v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16)) + } + + var uv uint64 + if op.b3 { // signed + if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 { + uv = uint64(uint16(0x8000)) + } else if added > math.MaxInt16 { + uv = uint64(uint16(0x7fff)) + } else { + uv = uint64(uint16(int16(added))) + } + } else { + if added := int64(v) + int64(w); added < 0 { + uv = uint64(uint16(0)) + } else if added > math.MaxUint16 { + uv = uint64(uint16(0xffff)) + } else { + uv = uint64(uint16(added)) + } + } + + if i < 4 { + retLo |= uv << (i * 16) + } else { + retHi |= uv << ((i - 4) * 16) + } + } + } + + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128SubSat: + x2hi, x2Lo := ce.popValue(), ce.popValue() + x1hi, x1Lo := ce.popValue(), ce.popValue() + + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI8x16: + for i := 0; i < 16; i++ { + var v, w byte + if i < 8 { + v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8)) + } else { + v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8)) + } + + var uv uint64 + if op.b3 { // signed + if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 { + uv = uint64(byte(0x80)) + } else if subbed > math.MaxInt8 { + uv = uint64(byte(0x7f)) + } else { + uv = uint64(byte(int8(subbed))) + } + } else { + if subbed := int64(v) - int64(w); subbed < 0 { + uv = uint64(byte(0)) + } else if subbed > math.MaxUint8 { + uv = uint64(byte(0xff)) + } else { + uv = uint64(byte(subbed)) + } + } + + if i < 8 { + retLo |= uv << (i * 8) + } else { + retHi |= uv << ((i - 8) * 8) + } + } + case wazeroir.ShapeI16x8: + for i := 0; i < 8; i++ { + var v, w uint16 + if i < 4 { + v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16)) + } else { + v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16)) + } + + var uv uint64 + if op.b3 { // signed + if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 { + uv = uint64(uint16(0x8000)) + } else if subbed > math.MaxInt16 { + uv = uint64(uint16(0x7fff)) + } else { + uv = uint64(uint16(int16(subbed))) + } + } else { + if subbed := int64(v) - int64(w); subbed < 0 { + uv = uint64(uint16(0)) + } else if subbed > math.MaxUint16 { + uv = uint64(uint16(0xffff)) + } else { + uv = uint64(uint16(subbed)) + } + } + + if i < 4 { + retLo |= uv << (i * 16) + } else { + retHi |= uv << ((i - 4) * 16) + } + } + } + + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Mul: + x2hi, x2lo := ce.popValue(), ce.popValue() + x1hi, x1lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI16x8: + retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) | + (uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48) + retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) | + (uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48) + case wazeroir.ShapeI32x4: + retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32) + retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32) + case wazeroir.ShapeI64x2: + retHi = x1hi * x2hi + retLo = x1lo * x2lo + case wazeroir.ShapeF32x4: + retHi = uint64(math.Float32bits(math.Float32frombits(uint32(x1hi))*math.Float32frombits(uint32(x2hi)))) | + (uint64(math.Float32bits(math.Float32frombits(uint32(x1hi>>32))*math.Float32frombits(uint32(x2hi>>32)))) << 32) + retLo = uint64(math.Float32bits(math.Float32frombits(uint32(x1lo))*math.Float32frombits(uint32(x2lo)))) | + (uint64(math.Float32bits(math.Float32frombits(uint32(x1lo>>32))*math.Float32frombits(uint32(x2lo>>32)))) << 32) + case wazeroir.ShapeF64x2: + retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi)) + retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo)) + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Div: + x2hi, x2lo := ce.popValue(), ce.popValue() + x1hi, x1lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + if op.b1 == wazeroir.ShapeF64x2 { + retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi)) + retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo)) + } else { + retHi = uint64(math.Float32bits(math.Float32frombits(uint32(x1hi))/math.Float32frombits(uint32(x2hi)))) | + (uint64(math.Float32bits(math.Float32frombits(uint32(x1hi>>32))/math.Float32frombits(uint32(x2hi>>32)))) << 32) + retLo = uint64(math.Float32bits(math.Float32frombits(uint32(x1lo))/math.Float32frombits(uint32(x2lo)))) | + (uint64(math.Float32bits(math.Float32frombits(uint32(x1lo>>32))/math.Float32frombits(uint32(x2lo>>32)))) << 32) + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Neg: + hi, lo := ce.popValue(), ce.popValue() + switch op.b1 { + case wazeroir.ShapeI8x16: + lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) | + (uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) | + (uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) | + (uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56) + hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) | + (uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) | + (uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) | + (uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56) + case wazeroir.ShapeI16x8: + hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) | + (uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48) + lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) | + (uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48) + case wazeroir.ShapeI32x4: + hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32) + lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32) + case wazeroir.ShapeI64x2: + hi = -hi + lo = -lo + case wazeroir.ShapeF32x4: + hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) | + (uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32) + lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) | + (uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32) + case wazeroir.ShapeF64x2: + hi = math.Float64bits(-math.Float64frombits(hi)) + lo = math.Float64bits(-math.Float64frombits(lo)) + } + ce.pushValue(lo) + ce.pushValue(hi) + frame.pc++ + case wazeroir.OperationKindV128Sqrt: + hi, lo := ce.popValue(), ce.popValue() + if op.b1 == wazeroir.ShapeF64x2 { + hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi))) + lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo))) + } else { + hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) | + (uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32) + lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) | + (uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32) + } + ce.pushValue(lo) + ce.pushValue(hi) + frame.pc++ + case wazeroir.OperationKindV128Abs: + hi, lo := ce.popValue(), ce.popValue() + switch op.b1 { + case wazeroir.ShapeI8x16: + lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) | + (uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) | + (uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) | + (uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56) + hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) | + (uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) | + (uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) | + (uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56) + case wazeroir.ShapeI16x8: + hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) | + (uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48) + lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) | + (uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48) + case wazeroir.ShapeI32x4: + hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32) + lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32) + case wazeroir.ShapeI64x2: + if int64(hi) < 0 { + hi = -hi + } + if int64(lo) < 0 { + lo = -lo + } + case wazeroir.ShapeF32x4: + hi = hi &^ (1<<31 | 1<<63) + lo = lo &^ (1<<31 | 1<<63) + case wazeroir.ShapeF64x2: + hi = hi &^ (1 << 63) + lo = lo &^ (1 << 63) + } + ce.pushValue(lo) + ce.pushValue(hi) + frame.pc++ + case wazeroir.OperationKindV128Popcnt: + hi, lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + for i := 0; i < 16; i++ { + var v byte + if i < 8 { + v = byte(lo >> (i * 8)) + } else { + v = byte(hi >> ((i - 8) * 8)) + } + + var cnt uint64 + for i := 0; i < 8; i++ { + if (v>>i)&0b1 != 0 { + cnt++ + } + } + + if i < 8 { + retLo |= cnt << (i * 8) + } else { + retHi |= cnt << ((i - 8) * 8) + } + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Min: + x2hi, x2lo := ce.popValue(), ce.popValue() + x1hi, x1lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI8x16: + if op.b3 { // signed + retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) | + uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | + uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | + uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 + retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) | + uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | + uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | + uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 + } else { + retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) | + uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | + uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | + uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 + retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) | + uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | + uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | + uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 + } + case wazeroir.ShapeI16x8: + if op.b3 { // signed + retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) | + uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | + uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | + uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 + retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) | + uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | + uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | + uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 + } else { + retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) | + uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | + uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | + uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 + retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) | + uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | + uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | + uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 + } + case wazeroir.ShapeI32x4: + if op.b3 { // signed + retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) | + uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32 + retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) | + uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32 + } else { + retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) | + uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32 + retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) | + uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32 + } + case wazeroir.ShapeF32x4: + retHi = uint64(math.Float32bits(float32(moremath.WasmCompatMin( + float64(math.Float32frombits(uint32(x1hi))), + float64(math.Float32frombits(uint32(x2hi))), + )))) | uint64(math.Float32bits(float32(moremath.WasmCompatMin( + float64(math.Float32frombits(uint32(x1hi>>32))), + float64(math.Float32frombits(uint32(x2hi>>32))), + ))))<<32 + retLo = uint64(math.Float32bits(float32(moremath.WasmCompatMin( + float64(math.Float32frombits(uint32(x1lo))), + float64(math.Float32frombits(uint32(x2lo))), + )))) | uint64(math.Float32bits(float32(moremath.WasmCompatMin( + float64(math.Float32frombits(uint32(x1lo>>32))), + float64(math.Float32frombits(uint32(x2lo>>32))), + ))))<<32 + case wazeroir.ShapeF64x2: + retHi = math.Float64bits(moremath.WasmCompatMin( + math.Float64frombits(x1hi), + math.Float64frombits(x2hi), + )) + retLo = math.Float64bits(moremath.WasmCompatMin( + math.Float64frombits(x1lo), + math.Float64frombits(x2lo), + )) + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Max: + x2hi, x2lo := ce.popValue(), ce.popValue() + x1hi, x1lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI8x16: + if op.b3 { // signed + retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) | + uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | + uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | + uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 + retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) | + uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | + uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | + uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 + } else { + retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) | + uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | + uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | + uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 + retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) | + uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | + uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | + uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 + } + case wazeroir.ShapeI16x8: + if op.b3 { // signed + retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) | + uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | + uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | + uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 + retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) | + uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | + uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | + uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 + } else { + retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) | + uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | + uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | + uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 + retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) | + uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | + uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | + uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 + } + case wazeroir.ShapeI32x4: + if op.b3 { // signed + retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) | + uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32 + retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) | + uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32 + } else { + retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) | + uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32 + retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) | + uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32 + } + case wazeroir.ShapeF32x4: + retHi = uint64(math.Float32bits(float32(moremath.WasmCompatMax( + float64(math.Float32frombits(uint32(x1hi))), + float64(math.Float32frombits(uint32(x2hi))), + )))) | uint64(math.Float32bits(float32(moremath.WasmCompatMax( + float64(math.Float32frombits(uint32(x1hi>>32))), + float64(math.Float32frombits(uint32(x2hi>>32))), + ))))<<32 + retLo = uint64(math.Float32bits(float32(moremath.WasmCompatMax( + float64(math.Float32frombits(uint32(x1lo))), + float64(math.Float32frombits(uint32(x2lo))), + )))) | uint64(math.Float32bits(float32(moremath.WasmCompatMax( + float64(math.Float32frombits(uint32(x1lo>>32))), + float64(math.Float32frombits(uint32(x2lo>>32))), + ))))<<32 + case wazeroir.ShapeF64x2: + retHi = math.Float64bits(moremath.WasmCompatMax( + math.Float64frombits(x1hi), + math.Float64frombits(x2hi), + )) + retLo = math.Float64bits(moremath.WasmCompatMax( + math.Float64frombits(x1lo), + math.Float64frombits(x2lo), + )) + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128AvgrU: + x2hi, x2lo := ce.popValue(), ce.popValue() + x1hi, x1lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI8x16: + retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) | + uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | + uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | + uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 + retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) | + uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | + uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | + uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 + case wazeroir.ShapeI16x8: + retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) | + uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | + uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | + uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 + retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) | + uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | + uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | + uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Pmin: + x2hi, x2lo := ce.popValue(), ce.popValue() + x1hi, x1lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + if op.b1 == wazeroir.ShapeF32x4 { + if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) { + retLo = x2lo & 0x00000000_ffffffff + } else { + retLo = x1lo & 0x00000000_ffffffff + } + if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) { + retLo |= x2lo & 0xffffffff_00000000 + } else { + retLo |= x1lo & 0xffffffff_00000000 + } + if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) { + retHi = x2hi & 0x00000000_ffffffff + } else { + retHi = x1hi & 0x00000000_ffffffff + } + if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) { + retHi |= x2hi & 0xffffffff_00000000 + } else { + retHi |= x1hi & 0xffffffff_00000000 + } + } else { + if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) { + retLo = x2lo + } else { + retLo = x1lo + } + if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) { + retHi = x2hi + } else { + retHi = x1hi + } + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Pmax: + x2hi, x2lo := ce.popValue(), ce.popValue() + x1hi, x1lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + if op.b1 == wazeroir.ShapeF32x4 { + if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) { + retLo = x2lo & 0x00000000_ffffffff + } else { + retLo = x1lo & 0x00000000_ffffffff + } + if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) { + retLo |= x2lo & 0xffffffff_00000000 + } else { + retLo |= x1lo & 0xffffffff_00000000 + } + if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) { + retHi = x2hi & 0x00000000_ffffffff + } else { + retHi = x1hi & 0x00000000_ffffffff + } + if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) { + retHi |= x2hi & 0xffffffff_00000000 + } else { + retHi |= x1hi & 0xffffffff_00000000 + } + } else { + if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) { + retLo = x2lo + } else { + retLo = x1lo + } + if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) { + retHi = x2hi + } else { + retHi = x1hi + } + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Ceil: + hi, lo := ce.popValue(), ce.popValue() + if op.b1 == wazeroir.ShapeF32x4 { + lo = uint64(math.Float32bits(float32(math.Ceil(float64(math.Float32frombits(uint32(lo))))))) | + (uint64(math.Float32bits(float32(math.Ceil(float64(math.Float32frombits(uint32(lo>>32))))))) << 32) + hi = uint64(math.Float32bits(float32(math.Ceil(float64(math.Float32frombits(uint32(hi))))))) | + (uint64(math.Float32bits(float32(math.Ceil(float64(math.Float32frombits(uint32(hi>>32))))))) << 32) + } else { + lo = math.Float64bits(math.Ceil(math.Float64frombits(lo))) + hi = math.Float64bits(math.Ceil(math.Float64frombits(hi))) + } + ce.pushValue(lo) + ce.pushValue(hi) + frame.pc++ + case wazeroir.OperationKindV128Floor: + hi, lo := ce.popValue(), ce.popValue() + if op.b1 == wazeroir.ShapeF32x4 { + lo = uint64(math.Float32bits(float32(math.Floor(float64(math.Float32frombits(uint32(lo))))))) | + (uint64(math.Float32bits(float32(math.Floor(float64(math.Float32frombits(uint32(lo>>32))))))) << 32) + hi = uint64(math.Float32bits(float32(math.Floor(float64(math.Float32frombits(uint32(hi))))))) | + (uint64(math.Float32bits(float32(math.Floor(float64(math.Float32frombits(uint32(hi>>32))))))) << 32) + } else { + lo = math.Float64bits(math.Floor(math.Float64frombits(lo))) + hi = math.Float64bits(math.Floor(math.Float64frombits(hi))) + } + ce.pushValue(lo) + ce.pushValue(hi) + frame.pc++ + case wazeroir.OperationKindV128Trunc: + hi, lo := ce.popValue(), ce.popValue() + if op.b1 == wazeroir.ShapeF32x4 { + lo = uint64(math.Float32bits(float32(math.Trunc(float64(math.Float32frombits(uint32(lo))))))) | + (uint64(math.Float32bits(float32(math.Trunc(float64(math.Float32frombits(uint32(lo>>32))))))) << 32) + hi = uint64(math.Float32bits(float32(math.Trunc(float64(math.Float32frombits(uint32(hi))))))) | + (uint64(math.Float32bits(float32(math.Trunc(float64(math.Float32frombits(uint32(hi>>32))))))) << 32) + } else { + lo = math.Float64bits(math.Trunc(math.Float64frombits(lo))) + hi = math.Float64bits(math.Trunc(math.Float64frombits(hi))) + } + ce.pushValue(lo) + ce.pushValue(hi) + frame.pc++ + case wazeroir.OperationKindV128Nearest: + hi, lo := ce.popValue(), ce.popValue() + if op.b1 == wazeroir.ShapeF32x4 { + lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) | + (uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32) + hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) | + (uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32) + } else { + lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo))) + hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi))) + } + ce.pushValue(lo) + ce.pushValue(hi) + frame.pc++ + case wazeroir.OperationKindV128Extend: + hi, lo := ce.popValue(), ce.popValue() + var origin uint64 + if op.b3 { // use lower 64 bits + origin = lo + } else { + origin = hi + } + + signed := op.b2 == 1 + + var retHi, retLo uint64 + switch op.b1 { + case wazeroir.ShapeI8x16: + for i := 0; i < 8; i++ { + v8 := byte(origin >> (i * 8)) + + var v16 uint16 + if signed { + v16 = uint16(int8(v8)) + } else { + v16 = uint16(v8) + } + + if i < 4 { + retLo |= uint64(v16) << (i * 16) + } else { + retHi |= uint64(v16) << ((i - 4) * 16) + } + } + case wazeroir.ShapeI16x8: + for i := 0; i < 4; i++ { + v16 := uint16(origin >> (i * 16)) + + var v32 uint32 + if signed { + v32 = uint32(int16(v16)) + } else { + v32 = uint32(v16) + } + + if i < 2 { + retLo |= uint64(v32) << (i * 32) + } else { + retHi |= uint64(v32) << ((i - 2) * 32) + } + } + case wazeroir.ShapeI32x4: + v32Lo := uint32(origin) + v32Hi := uint32(origin >> 32) + if signed { + retLo = uint64(int32(v32Lo)) + retHi = uint64(int32(v32Hi)) + } else { + retLo = uint64(v32Lo) + retHi = uint64(v32Hi) + } + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128ExtMul: + x2Hi, x2Lo := ce.popValue(), ce.popValue() + x1Hi, x1Lo := ce.popValue(), ce.popValue() + var x1, x2 uint64 + if op.b3 { // use lower 64 bits + x1, x2 = x1Lo, x2Lo + } else { + x1, x2 = x1Hi, x2Hi + } + + signed := op.b2 == 1 + + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI8x16: + for i := 0; i < 8; i++ { + v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8)) + + var v16 uint16 + if signed { + v16 = uint16(int16(int8(v1)) * int16(int8(v2))) + } else { + v16 = uint16(v1) * uint16(v2) + } + + if i < 4 { + retLo |= uint64(v16) << (i * 16) + } else { + retHi |= uint64(v16) << ((i - 4) * 16) + } + } + case wazeroir.ShapeI16x8: + for i := 0; i < 4; i++ { + v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16)) + + var v32 uint32 + if signed { + v32 = uint32(int32(int16(v1)) * int32(int16(v2))) + } else { + v32 = uint32(v1) * uint32(v2) + } + + if i < 2 { + retLo |= uint64(v32) << (i * 32) + } else { + retHi |= uint64(v32) << ((i - 2) * 32) + } + } + case wazeroir.ShapeI32x4: + v1Lo, v2Lo := uint32(x1), uint32(x2) + v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32) + if signed { + retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo))) + retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi))) + } else { + retLo = uint64(v1Lo) * uint64(v2Lo) + retHi = uint64(v1Hi) * uint64(v2Hi) + } + } + + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Q15mulrSatS: + x2hi, x2Lo := ce.popValue(), ce.popValue() + x1hi, x1Lo := ce.popValue(), ce.popValue() + var retLo, retHi uint64 + for i := 0; i < 8; i++ { + var v, w int16 + if i < 4 { + v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16))) + } else { + v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16))) + } + + var uv uint64 + // https://github.com/WebAssembly/spec/blob/main/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication + if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 { + uv = uint64(uint16(0x8000)) + } else if calc > math.MaxInt16 { + uv = uint64(uint16(0x7fff)) + } else { + uv = uint64(uint16(int16(calc))) + } + + if i < 4 { + retLo |= uv << (i * 16) + } else { + retHi |= uv << ((i - 4) * 16) + } + } + + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128ExtAddPairwise: + hi, lo := ce.popValue(), ce.popValue() + + signed := op.b3 + + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI8x16: + for i := 0; i < 8; i++ { + var v1, v2 byte + if i < 4 { + v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8)) + } else { + v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8)) + } + + var v16 uint16 + if signed { + v16 = uint16(int16(int8(v1)) + int16(int8(v2))) + } else { + v16 = uint16(v1) + uint16(v2) + } + + if i < 4 { + retLo |= uint64(v16) << (i * 16) + } else { + retHi |= uint64(v16) << ((i - 4) * 16) + } + } + case wazeroir.ShapeI16x8: + for i := 0; i < 4; i++ { + var v1, v2 uint16 + if i < 2 { + v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16)) + } else { + v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16)) + } + + var v32 uint32 + if signed { + v32 = uint32(int32(int16(v1)) + int32(int16(v2))) + } else { + v32 = uint32(v1) + uint32(v2) + } + + if i < 2 { + retLo |= uint64(v32) << (i * 32) + } else { + retHi |= uint64(v32) << ((i - 2) * 32) + } + } + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128FloatPromote: + hi, lo := ce.popValue(), ce.popValue() + ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(lo))))) + ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(hi))))) + frame.pc++ + case wazeroir.OperationKindV128FloatDemote: + hi, lo := ce.popValue(), ce.popValue() + ce.pushValue( + uint64(math.Float32bits(float32(math.Float64frombits(lo)))) | + (uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32), + ) + ce.pushValue(0) + frame.pc++ + case wazeroir.OperationKindV128FConvertFromI: + hi, lo := ce.popValue(), ce.popValue() + v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32) + signed := op.b3 + + var retLo, retHi uint64 + switch op.b1 { // Destination shape. + case wazeroir.ShapeF32x4: // f32x4 from signed/unsigned i32x4 + if signed { + retLo = uint64(math.Float32bits(float32(int32(v1)))) | + (uint64(math.Float32bits(float32(int32(v2)))) << 32) + retHi = uint64(math.Float32bits(float32(int32(v3)))) | + (uint64(math.Float32bits(float32(int32(v4)))) << 32) + } else { + retLo = uint64(math.Float32bits(float32(v1))) | + (uint64(math.Float32bits(float32(v2))) << 32) + retHi = uint64(math.Float32bits(float32(v3))) | + (uint64(math.Float32bits(float32(v4))) << 32) + } + case wazeroir.ShapeF64x2: // f64x2 from signed/unsigned i32x4 + if signed { + retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2))) + } else { + retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2)) + } + } + + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Narrow: + x2Hi, x2Lo := ce.popValue(), ce.popValue() + x1Hi, x1Lo := ce.popValue(), ce.popValue() + signed := op.b3 + + var retLo, retHi uint64 + switch op.b1 { + case wazeroir.ShapeI16x8: // signed/unsigned i16x8 to i8x16 + for i := 0; i < 8; i++ { + var v16 uint16 + if i < 4 { + v16 = uint16(x1Lo >> (i * 16)) + } else { + v16 = uint16(x1Hi >> ((i - 4) * 16)) + } + + var v byte + if signed { + if s := int16(v16); s > math.MaxInt8 { + v = math.MaxInt8 + } else if s < math.MinInt8 { + s = math.MinInt8 + v = byte(s) + } else { + v = byte(v16) + } + } else { + if s := int16(v16); s > math.MaxUint8 { + v = math.MaxUint8 + } else if s < 0 { + v = 0 + } else { + v = byte(v16) + } + } + retLo |= uint64(v) << (i * 8) + } + for i := 0; i < 8; i++ { + var v16 uint16 + if i < 4 { + v16 = uint16(x2Lo >> (i * 16)) + } else { + v16 = uint16(x2Hi >> ((i - 4) * 16)) + } + + var v byte + if signed { + if s := int16(v16); s > math.MaxInt8 { + v = math.MaxInt8 + } else if s < math.MinInt8 { + s = math.MinInt8 + v = byte(s) + } else { + v = byte(v16) + } + } else { + if s := int16(v16); s > math.MaxUint8 { + v = math.MaxUint8 + } else if s < 0 { + v = 0 + } else { + v = byte(v16) + } + } + retHi |= uint64(v) << (i * 8) + } + case wazeroir.ShapeI32x4: // signed/unsigned i32x4 to i16x8 + for i := 0; i < 4; i++ { + var v32 uint32 + if i < 2 { + v32 = uint32(x1Lo >> (i * 32)) + } else { + v32 = uint32(x1Hi >> ((i - 2) * 32)) + } + + var v uint16 + if signed { + if s := int32(v32); s > math.MaxInt16 { + v = math.MaxInt16 + } else if s < math.MinInt16 { + s = math.MinInt16 + v = uint16(s) + } else { + v = uint16(v32) + } + } else { + if s := int32(v32); s > math.MaxUint16 { + v = math.MaxUint16 + } else if s < 0 { + v = 0 + } else { + v = uint16(v32) + } + } + retLo |= uint64(v) << (i * 16) + } + + for i := 0; i < 4; i++ { + var v32 uint32 + if i < 2 { + v32 = uint32(x2Lo >> (i * 32)) + } else { + v32 = uint32(x2Hi >> ((i - 2) * 32)) + } + + var v uint16 + if signed { + if s := int32(v32); s > math.MaxInt16 { + v = math.MaxInt16 + } else if s < math.MinInt16 { + s = math.MinInt16 + v = uint16(s) + } else { + v = uint16(v32) + } + } else { + if s := int32(v32); s > math.MaxUint16 { + v = math.MaxUint16 + } else if s < 0 { + v = 0 + } else { + v = uint16(v32) + } + } + retHi |= uint64(v) << (i * 16) + } + } + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + case wazeroir.OperationKindV128Dot: + x2Hi, x2Lo := ce.popValue(), ce.popValue() + x1Hi, x1Lo := ce.popValue(), ce.popValue() + ce.pushValue( + uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) | + (uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32), + ) + ce.pushValue( + uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) | + (uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32), + ) + frame.pc++ + case wazeroir.OperationKindV128ITruncSatFromF: + hi, lo := ce.popValue(), ce.popValue() + signed := op.b3 + var retLo, retHi uint64 + + switch op.b1 { + case wazeroir.ShapeF32x4: // f32x4 to i32x4 + for i, f64 := range [4]float64{ + math.Trunc(float64(math.Float32frombits(uint32(lo)))), + math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))), + math.Trunc(float64(math.Float32frombits(uint32(hi)))), + math.Trunc(float64(math.Float32frombits(uint32(hi >> 32))))} { + + var v uint32 + if math.IsNaN(f64) { + v = 0 + } else if signed { + if f64 < math.MinInt32 { + f64 = math.MinInt32 + } else if f64 > math.MaxInt32 { + f64 = math.MaxInt32 + } + v = uint32(int32(f64)) + } else { + if f64 < 0 { + f64 = 0 + } else if f64 > math.MaxUint32 { + f64 = math.MaxUint32 + } + v = uint32(f64) + } + + if i < 2 { + retLo |= uint64(v) << (i * 32) + } else { + retHi |= uint64(v) << ((i - 2) * 32) + } + } + + case wazeroir.ShapeF64x2: // f64x2 to i32x4 + for i, f := range [2]float64{ + math.Trunc(math.Float64frombits(lo)), + math.Trunc(math.Float64frombits(hi)), + } { + var v uint32 + if math.IsNaN(f) { + v = 0 + } else if signed { + if f < math.MinInt32 { + f = math.MinInt32 + } else if f > math.MaxInt32 { + f = math.MaxInt32 + } + v = uint32(int32(f)) + } else { + if f < 0 { + f = 0 + } else if f > math.MaxUint32 { + f = math.MaxUint32 + } + v = uint32(f) + } + + retLo |= uint64(v) << (i * 32) + } + } + + ce.pushValue(retLo) + ce.pushValue(retHi) + frame.pc++ + frame.pc++ + } + } + ce.popFrame() +} + +// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2 +func flt32(z1, z2 float32) bool { + if z1 != z1 || z2 != z2 { + return false + } else if z1 == z2 { + return false + } else if math.IsInf(float64(z1), 1) { + return false + } else if math.IsInf(float64(z1), -1) { + return true + } else if math.IsInf(float64(z2), 1) { + return true + } else if math.IsInf(float64(z2), -1) { + return false + } + return z1 < z2 +} + +// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2 +func flt64(z1, z2 float64) bool { + if z1 != z1 || z2 != z2 { + return false + } else if z1 == z2 { + return false + } else if math.IsInf(z1, 1) { + return false + } else if math.IsInf(z1, -1) { + return true + } else if math.IsInf(z2, 1) { + return true + } else if math.IsInf(z2, -1) { + return false + } + return z1 < z2 +} + +func i8RoundingAverage(v1, v2 byte) byte { + // https://github.com/WebAssembly/spec/blob/main/proposals/simd/SIMD.md#lane-wise-integer-rounding-average + return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2) +} + +func i16RoundingAverage(v1, v2 uint16) uint16 { + // https://github.com/WebAssembly/spec/blob/main/proposals/simd/SIMD.md#lane-wise-integer-rounding-average + return uint16((uint32(v1) + uint32(v2) + 1) / 2) +} + +func i8Abs(v byte) byte { + if i := int8(v); i < 0 { + return byte(-i) + } else { + return byte(i) + } +} + +func i8MaxU(v1, v2 byte) byte { + if v1 < v2 { + return v2 + } else { + return v1 + } +} + +func i8MinU(v1, v2 byte) byte { + if v1 > v2 { + return v2 + } else { + return v1 + } +} + +func i8MaxS(v1, v2 byte) byte { + if int8(v1) < int8(v2) { + return v2 + } else { + return v1 + } +} + +func i8MinS(v1, v2 byte) byte { + if int8(v1) > int8(v2) { + return v2 + } else { + return v1 + } +} + +func i16MaxU(v1, v2 uint16) uint16 { + if v1 < v2 { + return v2 + } else { + return v1 + } +} + +func i16MinU(v1, v2 uint16) uint16 { + if v1 > v2 { + return v2 + } else { + return v1 + } +} + +func i16MaxS(v1, v2 uint16) uint16 { + if int16(v1) < int16(v2) { + return v2 + } else { + return v1 + } +} + +func i16MinS(v1, v2 uint16) uint16 { + if int16(v1) > int16(v2) { + return v2 + } else { + return v1 + } +} + +func i32MaxU(v1, v2 uint32) uint32 { + if v1 < v2 { + return v2 + } else { + return v1 + } +} + +func i32MinU(v1, v2 uint32) uint32 { + if v1 > v2 { + return v2 + } else { + return v1 + } +} + +func i32MaxS(v1, v2 uint32) uint32 { + if int32(v1) < int32(v2) { + return v2 + } else { + return v1 + } +} + +func i32MinS(v1, v2 uint32) uint32 { + if int32(v1) > int32(v2) { + return v2 + } else { + return v1 + } +} + +func i16Abs(v uint16) uint16 { + if i := int16(v); i < 0 { + return uint16(-i) + } else { + return uint16(i) + } +} + +func i32Abs(v uint32) uint32 { + if i := int32(v); i < 0 { + return uint32(-i) + } else { + return uint32(i) + } } func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, callCtx *wasm.CallContext, f *function, fnl experimental.FunctionListener) context.Context { diff --git a/internal/integration_test/asm/amd64_debug/golang_asm.go b/internal/integration_test/asm/amd64_debug/golang_asm.go index f4bf0b13429..cfbbe0a8faf 100644 --- a/internal/integration_test/asm/amd64_debug/golang_asm.go +++ b/internal/integration_test/asm/amd64_debug/golang_asm.go @@ -560,13 +560,13 @@ var castAsGolangAsmInstruction = [...]obj.As{ amd64.PINSRQ: x86.APINSRQ, amd64.PADDB: x86.APADDB, amd64.PADDW: x86.APADDW, - amd64.PADDL: x86.APADDL, + amd64.PADDD: x86.APADDL, amd64.PADDQ: x86.APADDQ, amd64.ADDPS: x86.AADDPS, amd64.ADDPD: x86.AADDPD, amd64.PSUBB: x86.APSUBB, amd64.PSUBW: x86.APSUBW, - amd64.PSUBL: x86.APSUBL, + amd64.PSUBD: x86.APSUBL, amd64.PSUBQ: x86.APSUBQ, amd64.SUBPS: x86.ASUBPS, amd64.SUBPD: x86.ASUBPD, diff --git a/internal/integration_test/asm/amd64_debug/impl_test.go b/internal/integration_test/asm/amd64_debug/impl_test.go index c6695b1e09e..e932375f256 100644 --- a/internal/integration_test/asm/amd64_debug/impl_test.go +++ b/internal/integration_test/asm/amd64_debug/impl_test.go @@ -812,13 +812,13 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { }{ {instruction: amd64.PADDB, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.PADDW, srcRegs: floatRegisters, DstRegs: floatRegisters}, - {instruction: amd64.PADDL, srcRegs: floatRegisters, DstRegs: floatRegisters}, + {instruction: amd64.PADDD, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.PADDQ, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.ADDPS, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.ADDPD, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.PSUBB, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.PSUBW, srcRegs: floatRegisters, DstRegs: floatRegisters}, - {instruction: amd64.PSUBL, srcRegs: floatRegisters, DstRegs: floatRegisters}, + {instruction: amd64.PSUBD, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.PSUBQ, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.SUBPS, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.SUBPD, srcRegs: floatRegisters, DstRegs: floatRegisters}, diff --git a/internal/integration_test/spectest/spectest.go b/internal/integration_test/spectest/spectest.go index 1281bca2a5e..4293c9c8315 100644 --- a/internal/integration_test/spectest/spectest.go +++ b/internal/integration_test/spectest/spectest.go @@ -66,25 +66,51 @@ type ( } commandActionVal struct { - ValType string `json:"type"` - LaneType string `json:"lane_type"` + ValType string `json:"type"` + // LaneType is not empty if ValueType == "v128" + LaneType laneType `json:"lane_type"` Value interface{} `json:"value"` } ) +// laneType is a type of each lane of vector value. +// +// See https://github.com/WebAssembly/wabt/blob/main/docs/wast2json.md#const +type laneType = string + +const ( + laneTypeI8 laneType = "i8" + laneTypeI16 laneType = "i16" + laneTypeI32 laneType = "i32" + laneTypeI64 laneType = "i64" + laneTypeF32 laneType = "f32" + laneTypeF64 laneType = "f64" +) + func (c commandActionVal) String() string { var v string + valTypeStr := c.ValType switch c.ValType { case "i32": v = c.Value.(string) case "f32": - ret, _ := strconv.ParseUint(c.Value.(string), 10, 32) - v = fmt.Sprintf("%f", math.Float32frombits(uint32(ret))) + str := c.Value.(string) + if strings.Contains(str, "nan") { + v = "nan" + } else { + ret, _ := strconv.ParseUint(str, 10, 32) + v = fmt.Sprintf("%f", math.Float32frombits(uint32(ret))) + } case "i64": v = c.Value.(string) case "f64": - ret, _ := strconv.ParseUint(c.Value.(string), 10, 64) - v = fmt.Sprintf("%f", math.Float64frombits(ret)) + str := c.Value.(string) + if strings.Contains(str, "nan") { + v = "nan" + } else { + ret, _ := strconv.ParseUint(str, 10, 64) + v = fmt.Sprintf("%f", math.Float64frombits(ret)) + } case "externref": if c.Value == "null" { v = "null" @@ -107,8 +133,9 @@ func (c commandActionVal) String() string { strs = append(strs, v.(string)) } v = strings.Join(strs, ",") + valTypeStr = fmt.Sprintf("v128[lane=%s]", c.LaneType) } - return fmt.Sprintf("{type: %s, value: %v}", c.ValType, v) + return fmt.Sprintf("{type: %s, value: %v}", valTypeStr, v) } func (c command) String() string { @@ -153,15 +180,14 @@ func (c command) getAssertReturnArgs() []uint64 { return args } -func (c command) getAssertReturnArgsExps() ([]uint64, []uint64) { - var args, exps []uint64 +func (c command) getAssertReturnArgsExps() (args []uint64, exps []uint64) { for _, arg := range c.Action.Args { args = append(args, arg.toUint64s()...) } for _, exp := range c.Exps { exps = append(exps, exp.toUint64s()...) } - return args, exps + return } func (c commandActionVal) toUint64s() (ret []uint64) { @@ -170,7 +196,6 @@ func (c commandActionVal) toUint64s() (ret []uint64) { if !ok { panic("BUG") } - var low, high uint64 var width, valNum int switch c.LaneType { case "i8": @@ -188,24 +213,39 @@ func (c commandActionVal) toUint64s() (ret []uint64) { default: panic("BUG") } - for i := 0; i < valNum/2; i++ { - v, err := strconv.ParseUint(strValues[i].(string), 10, width) - if err != nil { - panic(err) + lo, hi := buildLaneUint64(strValues, width, valNum) + return []uint64{lo, hi} + } else { + return []uint64{c.toUint64()} + } +} + +func buildLaneUint64(raw []interface{}, width, valNum int) (lo, hi uint64) { + for i := 0; i < valNum; i++ { + str := raw[i].(string) + + var v uint64 + var err error + if strings.Contains(str, "nan") { + if width == 64 { + v = math.Float64bits(math.NaN()) + } else { + v = uint64(math.Float32bits(float32(math.NaN()))) } - low |= (v << (i * width)) - } - for i := valNum / 2; i < valNum; i++ { - v, err := strconv.ParseUint(strValues[i].(string), 10, width) + } else { + v, err = strconv.ParseUint(str, 10, width) if err != nil { panic(err) } - high |= (v << ((i - valNum/2) * width)) } - return []uint64{low, high} - } else { - return []uint64{c.toUint64()} + + if half := valNum / 2; i < half { + lo |= v << (i * width) + } else { + hi |= v << ((i - half) * width) + } } + return } func (c commandActionVal) toUint64() (ret uint64) { @@ -441,7 +481,14 @@ func Run(t *testing.T, testDataFS embed.FS, newEngine func(wasm.Features) wasm.E vals, types, err := callFunction(ns, moduleName, c.Action.Field, args...) require.NoError(t, err, msg) require.Equal(t, len(exps), len(vals), msg) - requireValuesEq(t, vals, exps, types, msg) + laneTypes := map[int]string{} + for i, expV := range c.Exps { + if expV.ValType == "v128" { + laneTypes[i] = expV.LaneType + } + } + matched, valuesMsg := valuesEq(vals, exps, types, laneTypes) + require.True(t, matched, msg+"\n"+valuesMsg) case "get": _, exps := c.getAssertReturnArgsExps() require.Equal(t, 1, len(exps)) @@ -597,52 +644,157 @@ func testdataPath(filename string) string { return fmt.Sprintf("testdata/%s", filename) } -func requireValuesEq(t *testing.T, actual, exps []uint64, valTypes []wasm.ValueType, msg string) { - var expectedTypesVectorFlattend []wasm.ValueType - for _, tp := range valTypes { - if tp != wasm.ValueTypeV128 { - expectedTypesVectorFlattend = append(expectedTypesVectorFlattend, tp) - } else { - expectedTypesVectorFlattend = append(expectedTypesVectorFlattend, wasm.ValueTypeI64) - expectedTypesVectorFlattend = append(expectedTypesVectorFlattend, wasm.ValueTypeI64) +// valuesEq returns true if all the actual result matches exps which are all expressed as uint64. +// * actual,exps: comparison target values which are all represented as uint64, meaning that if valTypes = [V128,I32], then +// we have actual/exp = [(lower-64bit of the first V128), (higher-64bit of the first V128), I32]. +// * valTypes holds the wasm.ValueType(s) of the original values in Wasm. +// * laneTypes maps the index of valueTypes to laneType if valueTypes[i] == wasm.ValueTypeV128. +// +// Also, if matched == false this returns non-empty valuesMsg which can be used to augment the test failure message. +func valuesEq(actual, exps []uint64, valTypes []wasm.ValueType, laneTypes map[int]laneType) (matched bool, valuesMsg string) { + matched = true + + var msgExpValuesStrs, msgActualValuesStrs []string + var uint64RepPos int // the index to actual and exps slice. + for i, tp := range valTypes { + switch tp { + case wasm.ValueTypeI32: + msgExpValuesStrs = append(msgExpValuesStrs, fmt.Sprintf("%d", uint32(exps[uint64RepPos]))) + msgActualValuesStrs = append(msgActualValuesStrs, fmt.Sprintf("%d", uint32(actual[uint64RepPos]))) + matched = matched && (uint32(exps[uint64RepPos]) == uint32(actual[uint64RepPos])) + uint64RepPos++ + case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: + msgExpValuesStrs = append(msgExpValuesStrs, fmt.Sprintf("%d", exps[uint64RepPos])) + msgActualValuesStrs = append(msgActualValuesStrs, fmt.Sprintf("%d", actual[uint64RepPos])) + matched = matched && (exps[uint64RepPos] == actual[uint64RepPos]) + uint64RepPos++ + case wasm.ValueTypeF32: + a := math.Float32frombits(uint32(actual[uint64RepPos])) + e := math.Float32frombits(uint32(exps[uint64RepPos])) + msgExpValuesStrs = append(msgExpValuesStrs, fmt.Sprintf("%f", e)) + msgActualValuesStrs = append(msgActualValuesStrs, fmt.Sprintf("%f", a)) + matched = matched && f32Equal(e, a) + uint64RepPos++ + case wasm.ValueTypeF64: + e := math.Float64frombits(exps[uint64RepPos]) + a := math.Float64frombits(actual[uint64RepPos]) + msgExpValuesStrs = append(msgExpValuesStrs, fmt.Sprintf("%f", e)) + msgActualValuesStrs = append(msgActualValuesStrs, fmt.Sprintf("%f", a)) + matched = matched && f64Equal(e, a) + uint64RepPos++ + case wasm.ValueTypeV128: + actualLo, actualHi := actual[uint64RepPos], actual[uint64RepPos+1] + expLo, expHi := exps[uint64RepPos], exps[uint64RepPos+1] + switch laneTypes[i] { + case laneTypeI8: + msgExpValuesStrs = append(msgExpValuesStrs, + fmt.Sprintf("i8x16(%#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x)", + byte(expLo), byte(expLo>>8), byte(expLo>>16), byte(expLo>>24), + byte(expLo>>32), byte(expLo>>40), byte(expLo>>48), byte(expLo>>56), + byte(expHi), byte(expHi>>8), byte(expHi>>16), byte(expHi>>24), + byte(expHi>>32), byte(expHi>>40), byte(expHi>>48), byte(expHi>>56), + ), + ) + msgActualValuesStrs = append(msgActualValuesStrs, + fmt.Sprintf("i8x16(%#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x)", + byte(actualLo), byte(actualLo>>8), byte(actualLo>>16), byte(actualLo>>24), + byte(actualLo>>32), byte(actualLo>>40), byte(actualLo>>48), byte(actualLo>>56), + byte(actualHi), byte(actualHi>>8), byte(actualHi>>16), byte(actualHi>>24), + byte(actualHi>>32), byte(actualHi>>40), byte(actualHi>>48), byte(actualHi>>56), + ), + ) + matched = matched && (expLo == actualLo) && (expHi == actualHi) + case laneTypeI16: + msgExpValuesStrs = append(msgExpValuesStrs, + fmt.Sprintf("i16x8(%#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x)", + uint16(expLo), uint16(expLo>>16), uint16(expLo>>32), uint16(expLo>>48), + uint16(expHi), uint16(expHi>>16), uint16(expHi>>32), uint16(expHi>>48), + ), + ) + msgActualValuesStrs = append(msgActualValuesStrs, + fmt.Sprintf("i16x8(%#x, %#x, %#x, %#x, %#x, %#x, %#x, %#x)", + uint16(actualLo), uint16(actualLo>>16), uint16(actualLo>>32), uint16(actualLo>>48), + uint16(actualHi), uint16(actualHi>>16), uint16(actualHi>>32), uint16(actualHi>>48), + ), + ) + matched = matched && (expLo == actualLo) && (expHi == actualHi) + case laneTypeI32: + msgExpValuesStrs = append(msgExpValuesStrs, + fmt.Sprintf("i32x4(%#x, %#x, %#x, %#x)", uint32(expLo), uint32(expLo>>32), uint32(expHi), uint32(expHi>>32)), + ) + msgActualValuesStrs = append(msgActualValuesStrs, + fmt.Sprintf("i32x4(%#x, %#x, %#x, %#x)", uint32(actualLo), uint32(actualLo>>32), uint32(actualHi), uint32(actualHi>>32)), + ) + matched = matched && (expLo == actualLo) && (expHi == actualHi) + case laneTypeI64: + msgExpValuesStrs = append(msgExpValuesStrs, + fmt.Sprintf("i64x2(%#x, %#x)", expLo, expHi), + ) + msgActualValuesStrs = append(msgActualValuesStrs, + fmt.Sprintf("i64x2(%#x, %#x)", actualLo, actualHi), + ) + matched = matched && (expLo == actualLo) && (expHi == actualHi) + case laneTypeF32: + msgExpValuesStrs = append(msgExpValuesStrs, + fmt.Sprintf("f32x4(%f, %f, %f, %f)", + math.Float32frombits(uint32(expLo)), math.Float32frombits(uint32(expLo>>32)), + math.Float32frombits(uint32(expHi)), math.Float32frombits(uint32(expHi>>32)), + ), + ) + msgActualValuesStrs = append(msgActualValuesStrs, + fmt.Sprintf("f32x4(%f, %f, %f, %f)", + math.Float32frombits(uint32(actualLo)), math.Float32frombits(uint32(actualLo>>32)), + math.Float32frombits(uint32(actualHi)), math.Float32frombits(uint32(actualHi>>32)), + ), + ) + matched = matched && + f32Equal(math.Float32frombits(uint32(expLo)), math.Float32frombits(uint32(actualLo))) && + f32Equal(math.Float32frombits(uint32(expLo>>32)), math.Float32frombits(uint32(actualLo>>32))) && + f32Equal(math.Float32frombits(uint32(expHi)), math.Float32frombits(uint32(actualHi))) && + f32Equal(math.Float32frombits(uint32(expHi>>32)), math.Float32frombits(uint32(actualHi>>32))) + case laneTypeF64: + msgExpValuesStrs = append(msgExpValuesStrs, + fmt.Sprintf("f64x2(%f, %f)", math.Float64frombits(expLo), math.Float64frombits(expHi)), + ) + msgActualValuesStrs = append(msgActualValuesStrs, + fmt.Sprintf("f64x2(%f, %f)", math.Float64frombits(actualLo), math.Float64frombits(actualHi)), + ) + matched = matched && + f64Equal(math.Float64frombits(expLo), math.Float64frombits(actualLo)) && + f64Equal(math.Float64frombits(expHi), math.Float64frombits(actualHi)) + default: + panic("BUG") + } + uint64RepPos += 2 + default: + panic("BUG") } } - result := fmt.Sprintf("\thave (%v)\n\twant (%v)", actual, exps) - for i := range exps { - requireValueEq(t, actual[i], exps[i], expectedTypesVectorFlattend[i], msg+"\n"+result) + if !matched { + valuesMsg = fmt.Sprintf("\thave [%s]\n\twant [%s]", + strings.Join(msgActualValuesStrs, ", "), + strings.Join(msgExpValuesStrs, ", ")) } + return } -func requireValueEq(t *testing.T, actual, expected uint64, valType wasm.ValueType, msg string) { - switch valType { - case wasm.ValueTypeI32: - require.Equal(t, uint32(expected), uint32(actual), msg) - case wasm.ValueTypeI64: - require.Equal(t, expected, actual, msg) - case wasm.ValueTypeF32: - expF := math.Float32frombits(uint32(expected)) - actualF := math.Float32frombits(uint32(actual)) - if math.IsNaN(float64(expF)) { // NaN cannot be compared with themselves, so we have to use IsNaN - require.True(t, math.IsNaN(float64(actualF)), msg) - } else { - require.Equal(t, expF, actualF, msg) - } - case wasm.ValueTypeF64: - expF := math.Float64frombits(expected) - actualF := math.Float64frombits(actual) - if math.IsNaN(expF) { // NaN cannot be compared with themselves, so we have to use IsNaN - require.True(t, math.IsNaN(actualF), msg) - } else { - require.Equal(t, expF, actualF, msg) - } - case wasm.ValueTypeExternref: - require.Equal(t, expected, actual, msg) - case wasm.ValueTypeFuncref: - require.Equal(t, expected, actual, msg) - default: - t.Fatal(msg) +func f32Equal(expected, actual float32) (matched bool) { + if math.IsNaN(float64(expected)) { // NaN cannot be compared with themselves, so we have to use IsNaN + matched = math.IsNaN(float64(actual)) + } else { + matched = expected == actual + } + return +} + +func f64Equal(actual, expected float64) (matched bool) { + if math.IsNaN(expected) { // NaN cannot be compared with themselves, so we have to use IsNaN + matched = math.IsNaN(actual) + } else { + matched = expected == actual } + return } // callFunction is inlined here as the spectest needs to validate the signature was correct diff --git a/internal/integration_test/spectest/spectest_test.go b/internal/integration_test/spectest/spectest_test.go new file mode 100644 index 00000000000..b11ef1dc639 --- /dev/null +++ b/internal/integration_test/spectest/spectest_test.go @@ -0,0 +1,581 @@ +package spectest + +import ( + "encoding/json" + "math" + "testing" + + "github.com/tetratelabs/wazero/internal/testing/require" + "github.com/tetratelabs/wazero/internal/wasm" +) + +func Test_f32Equal(t *testing.T) { + tests := []struct { + name string + f1, f2 float32 + exp bool + }{ + {name: "1", f1: 1.1, f2: 1.1, exp: true}, + {name: "2", f1: float32(math.NaN()), f2: float32(math.NaN()), exp: true}, + {name: "3", f1: float32(math.Inf(1)), f2: float32(math.Inf(1)), exp: true}, + {name: "4", f1: float32(math.Inf(-1)), f2: float32(math.Inf(-1)), exp: true}, + {name: "5", f1: 1.1, f2: -1.1, exp: false}, + {name: "6", f1: float32(math.NaN()), f2: -1.1, exp: false}, + {name: "7", f1: -1.1, f2: float32(math.NaN()), exp: false}, + {name: "8", f1: float32(math.NaN()), f2: float32(math.Inf(1)), exp: false}, + {name: "9", f1: float32(math.Inf(1)), f2: float32(math.NaN()), exp: false}, + {name: "10", f1: float32(math.NaN()), f2: float32(math.Inf(-1)), exp: false}, + {name: "11", f1: float32(math.Inf(-1)), f2: float32(math.NaN()), exp: false}, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.exp, f32Equal(tc.f1, tc.f2)) + }) + } +} + +func Test_f64Equal(t *testing.T) { + tests := []struct { + name string + f1, f2 float64 + exp bool + }{ + {name: "1", f1: 1.1, f2: 1.1, exp: true}, + {name: "2", f1: math.NaN(), f2: math.NaN(), exp: true}, + {name: "3", f1: math.Inf(1), f2: math.Inf(1), exp: true}, + {name: "4", f1: math.Inf(-1), f2: math.Inf(-1), exp: true}, + {name: "5", f1: 1.1, f2: -1.1, exp: false}, + {name: "6", f1: math.NaN(), f2: -1.1, exp: false}, + {name: "7", f1: -1.1, f2: math.NaN(), exp: false}, + {name: "8", f1: math.NaN(), f2: math.Inf(1), exp: false}, + {name: "9", f1: math.Inf(1), f2: math.NaN(), exp: false}, + {name: "10", f1: math.NaN(), f2: math.Inf(-1), exp: false}, + {name: "11", f1: math.Inf(-1), f2: math.NaN(), exp: false}, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.exp, f64Equal(tc.f1, tc.f2)) + }) + } +} + +func Test_valuesEq(t *testing.T) { + i32, i64, f32, f64, v128 := wasm.ValueTypeI32, wasm.ValueTypeI64, wasm.ValueTypeF32, wasm.ValueTypeF64, wasm.ValueTypeV128 + tests := []struct { + name string + exps, actual []uint64 + valueTypes []wasm.ValueType + laneTypes map[int]laneType + expMatched bool + expValuesMsg string + }{ + { + name: "matched/i32", + exps: []uint64{0}, + actual: []uint64{0}, + valueTypes: []wasm.ValueType{i32}, + expMatched: true, + }, + { + name: "unmatched/i32", + exps: []uint64{1}, + actual: []uint64{0}, + valueTypes: []wasm.ValueType{i32}, + expMatched: false, + expValuesMsg: ` have [0] + want [1]`, + }, + { + name: "unmatched/i32", + exps: []uint64{math.MaxUint32}, + actual: []uint64{1123}, + valueTypes: []wasm.ValueType{i32}, + expMatched: false, + expValuesMsg: ` have [1123] + want [4294967295]`, + }, + { + name: "matched/i64", + exps: []uint64{0}, + actual: []uint64{0}, + valueTypes: []wasm.ValueType{i64}, + expMatched: true, + }, + { + name: "unmatched/i64", + exps: []uint64{1}, + actual: []uint64{0}, + valueTypes: []wasm.ValueType{i64}, + expMatched: false, + expValuesMsg: ` have [0] + want [1]`, + }, + { + name: "unmatched/i64", + exps: []uint64{math.MaxUint64}, + actual: []uint64{1123}, + valueTypes: []wasm.ValueType{i64}, + expMatched: false, + expValuesMsg: ` have [1123] + want [18446744073709551615]`, + }, + { + name: "matched/f32", + exps: []uint64{0}, + actual: []uint64{0}, + valueTypes: []wasm.ValueType{f32}, + expMatched: true, + }, + { + name: "unmatched/f32", + exps: []uint64{uint64(math.Float32bits(-13123.1))}, + actual: []uint64{0}, + valueTypes: []wasm.ValueType{f32}, + expMatched: false, + expValuesMsg: ` have [0.000000] + want [-13123.099609]`, + }, + { + name: "matched/f64", + exps: []uint64{0}, + actual: []uint64{0}, + valueTypes: []wasm.ValueType{f64}, + expMatched: true, + }, + { + name: "unmatched/f64", + exps: []uint64{math.Float64bits(1.0)}, + actual: []uint64{0}, + valueTypes: []wasm.ValueType{f64}, + expMatched: false, + expValuesMsg: ` have [0.000000] + want [1.000000]`, + }, + { + name: "unmatched/f64", + actual: []uint64{math.Float64bits(-1231231.0)}, + exps: []uint64{0}, + valueTypes: []wasm.ValueType{f64}, + expMatched: false, + expValuesMsg: ` have [-1231231.000000] + want [0.000000]`, + }, + { + name: "matched/i8x16", + exps: []uint64{math.MaxUint64, 123}, + actual: []uint64{math.MaxUint64, 123}, + laneTypes: map[int]laneType{0: laneTypeI8}, + valueTypes: []wasm.ValueType{v128}, + expMatched: true, + }, + { + name: "unmatched/i8x16", + exps: []uint64{0, 0xff<<56 | 0xaa}, + actual: []uint64{math.MaxUint64, 0xff<<48 | 0xcc}, + laneTypes: map[int]laneType{0: laneTypeI8}, + valueTypes: []wasm.ValueType{v128}, + expMatched: false, + expValuesMsg: ` have [i8x16(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0)] + want [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xaa, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff)]`, + }, + { + name: "matched/i16x8", + exps: []uint64{math.MaxUint64, 123}, + actual: []uint64{math.MaxUint64, 123}, + laneTypes: map[int]laneType{0: laneTypeI16}, + valueTypes: []wasm.ValueType{v128}, + expMatched: true, + }, + { + name: "unmatched/i16x8", + exps: []uint64{0xffff << 32, 0}, + actual: []uint64{0xaabb << 16, ^uint64(0)}, + laneTypes: map[int]laneType{0: laneTypeI16}, + valueTypes: []wasm.ValueType{v128}, + expMatched: false, + expValuesMsg: ` have [i16x8(0x0, 0xaabb, 0x0, 0x0, 0xffff, 0xffff, 0xffff, 0xffff)] + want [i16x8(0x0, 0x0, 0xffff, 0x0, 0x0, 0x0, 0x0, 0x0)]`, + }, + { + name: "matched/i32x4", + exps: []uint64{math.MaxUint64, 123}, + actual: []uint64{math.MaxUint64, 123}, + laneTypes: map[int]laneType{0: laneTypeI32}, + valueTypes: []wasm.ValueType{v128}, + expMatched: true, + }, + { + name: "matched/i32x4", + exps: []uint64{0xffff_ffff<<32 | 0xa, 123}, + actual: []uint64{0x1a1a_1a1a<<32 | 0xa, 123}, + laneTypes: map[int]laneType{0: laneTypeI32}, + valueTypes: []wasm.ValueType{v128}, + expMatched: false, + expValuesMsg: ` have [i32x4(0xa, 0x1a1a1a1a, 0x7b, 0x0)] + want [i32x4(0xa, 0xffffffff, 0x7b, 0x0)]`, + }, + { + name: "matched/i64x2", + exps: []uint64{math.MaxUint64, 123}, + actual: []uint64{math.MaxUint64, 123}, + laneTypes: map[int]laneType{0: laneTypeI64}, + valueTypes: []wasm.ValueType{v128}, + expMatched: true, + }, + { + name: "unmatched/i64x2", + exps: []uint64{math.MaxUint64, 123}, + actual: []uint64{math.MaxUint64, 0}, + laneTypes: map[int]laneType{0: laneTypeI64}, + valueTypes: []wasm.ValueType{v128}, + expMatched: false, + expValuesMsg: ` have [i64x2(0xffffffffffffffff, 0x0)] + want [i64x2(0xffffffffffffffff, 0x7b)]`, + }, + { + name: "matched/f32x4", + exps: []uint64{ + (uint64(math.Float32bits(float32(math.NaN()))) << 32) | uint64(math.Float32bits(float32(math.NaN()))), + (uint64(math.Float32bits(float32(math.NaN()))) << 32) | uint64(math.Float32bits(float32(math.NaN()))), + }, + actual: []uint64{ + (uint64(math.Float32bits(float32(math.NaN()))) << 32) | uint64(math.Float32bits(float32(math.NaN()))), + (uint64(math.Float32bits(float32(math.NaN()))) << 32) | uint64(math.Float32bits(float32(math.NaN()))), + }, + valueTypes: []wasm.ValueType{v128}, + laneTypes: map[int]laneType{0: laneTypeF32}, + expMatched: true, + }, + { + name: "unmatched/f32x4", + exps: []uint64{ + (uint64(math.Float32bits(float32(1.213))) << 32) | uint64(math.Float32bits(float32(math.NaN()))), + (uint64(math.Float32bits(float32(math.NaN()))) << 32) | uint64(math.Float32bits(float32(math.NaN()))), + }, + actual: []uint64{ + (uint64(math.Float32bits(float32(math.NaN()))) << 32) | uint64(math.Float32bits(float32(math.Inf(1)))), + (uint64(math.Float32bits(float32(math.Inf(-1)))) << 32) | uint64(math.Float32bits(float32(math.NaN()))), + }, + valueTypes: []wasm.ValueType{v128}, + laneTypes: map[int]laneType{0: laneTypeF32}, + expMatched: false, + expValuesMsg: ` have [f32x4(+Inf, NaN, NaN, -Inf)] + want [f32x4(NaN, 1.213000, NaN, NaN)]`, + }, + { + name: "matched/f64x2", + exps: []uint64{math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{math.Float64bits(1.0), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{v128}, + laneTypes: map[int]laneType{0: laneTypeF64}, + expMatched: true, + }, + { + name: "unmatched/f64x2", + exps: []uint64{math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{math.Float64bits(-1.0), math.Float64bits(math.Inf(1))}, + valueTypes: []wasm.ValueType{v128}, + laneTypes: map[int]laneType{0: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [f64x2(-1.000000, +Inf)] + want [f64x2(1.000000, NaN)]`, + }, + { + name: "unmatched/f64x2", + exps: []uint64{math.Float64bits(math.Inf(1)), math.Float64bits(math.NaN())}, + actual: []uint64{math.Float64bits(math.Inf(-1)), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{v128}, + laneTypes: map[int]laneType{0: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [f64x2(-Inf, NaN)] + want [f64x2(+Inf, NaN)]`, + }, + { + name: "matched/[i32,f64x2]", + exps: []uint64{1, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{1, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{i32, v128}, + laneTypes: map[int]laneType{1: laneTypeF64}, + expMatched: true, + }, + { + name: "unmatched/[i32,f64x2]", + exps: []uint64{123, math.Float64bits(math.Inf(1)), math.Float64bits(math.NaN())}, + actual: []uint64{123, math.Float64bits(math.Inf(-1)), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{i32, v128}, + laneTypes: map[int]laneType{1: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [123, f64x2(-Inf, NaN)] + want [123, f64x2(+Inf, NaN)]`, + }, + { + name: "matched/[i32,f64x2]", + exps: []uint64{math.Float64bits(1.0), math.Float64bits(math.NaN()), 1}, + actual: []uint64{math.Float64bits(1.0), math.Float64bits(math.NaN()), 1}, + valueTypes: []wasm.ValueType{v128, i32}, + laneTypes: map[int]laneType{0: laneTypeF64}, + expMatched: true, + }, + { + name: "unmatched/[f64x2,i32]", + exps: []uint64{math.Float64bits(math.Inf(1)), math.Float64bits(math.NaN()), 123}, + actual: []uint64{math.Float64bits(math.Inf(-1)), math.Float64bits(math.NaN()), 123}, + valueTypes: []wasm.ValueType{v128, i32}, + laneTypes: map[int]laneType{0: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [f64x2(-Inf, NaN), 123] + want [f64x2(+Inf, NaN), 123]`, + }, + { + name: "matched/[f32,i32,f64x2]", + exps: []uint64{uint64(math.Float32bits(float32(math.NaN()))), math.Float64bits(1.0), math.Float64bits(math.NaN()), 1}, + actual: []uint64{uint64(math.Float32bits(float32(math.NaN()))), math.Float64bits(1.0), math.Float64bits(math.NaN()), 1}, + valueTypes: []wasm.ValueType{f32, v128, i32}, + laneTypes: map[int]laneType{1: laneTypeF64}, + expMatched: true, + }, + { + name: "unmatched/[f32,f64x2,i32]", + exps: []uint64{uint64(math.Float32bits(1.0)), math.Float64bits(math.Inf(1)), math.Float64bits(math.NaN()), 123}, + actual: []uint64{uint64(math.Float32bits(1.0)), math.Float64bits(math.Inf(-1)), math.Float64bits(math.NaN()), 123}, + valueTypes: []wasm.ValueType{f32, v128, i32}, + laneTypes: map[int]laneType{1: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [1.000000, f64x2(-Inf, NaN), 123] + want [1.000000, f64x2(+Inf, NaN), 123]`, + }, + { + name: "matched/[i8x16,f64x2]", + exps: []uint64{0, 0, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{0, 0, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{v128, v128}, + laneTypes: map[int]laneType{0: laneTypeI8, 1: laneTypeF64}, + expMatched: true, + }, + { + name: "unmatched/[i8x16,f64x2]", + exps: []uint64{0, 0xff << 56, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{0, 0xaa << 56, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{v128, v128}, + laneTypes: map[int]laneType{0: laneTypeI8, 1: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xaa), f64x2(1.000000, NaN)] + want [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff), f64x2(1.000000, NaN)]`, + }, + { + name: "unmatched/[i8x16,f64x2]", + exps: []uint64{0, 0xff << 56, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{0, 0xff << 56, math.Float64bits(1.0), math.Float64bits(math.Inf(1))}, + valueTypes: []wasm.ValueType{v128, v128}, + laneTypes: map[int]laneType{0: laneTypeI8, 1: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff), f64x2(1.000000, +Inf)] + want [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff), f64x2(1.000000, NaN)]`, + }, + { + name: "matched/[i8x16,i32,f64x2]", + exps: []uint64{0, 0, math.MaxUint32, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{0, 0, math.MaxUint32, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{v128, i32, v128}, + laneTypes: map[int]laneType{0: laneTypeI8, 2: laneTypeF64}, + expMatched: true, + }, + { + name: "matched/[i8x16,i32,f64x2]", + exps: []uint64{0, 0, math.MaxUint32, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{0, 0, math.MaxUint32 - 1, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{v128, i32, v128}, + laneTypes: map[int]laneType{0: laneTypeI8, 2: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0), 4294967294, f64x2(1.000000, NaN)] + want [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0), 4294967295, f64x2(1.000000, NaN)]`, + }, + { + name: "matched/[i8x16,i32,f64x2]", + exps: []uint64{0, 0, math.MaxUint32, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + actual: []uint64{0, 0xff << 16, math.MaxUint32, math.Float64bits(1.0), math.Float64bits(math.NaN())}, + valueTypes: []wasm.ValueType{v128, i32, v128}, + laneTypes: map[int]laneType{0: laneTypeI8, 2: laneTypeF64}, + expMatched: false, + expValuesMsg: ` have [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x0, 0x0, 0x0), 4294967295, f64x2(1.000000, NaN)] + want [i8x16(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0), 4294967295, f64x2(1.000000, NaN)]`, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + actualMatched, actualValuesMsg := valuesEq(tc.actual, tc.exps, tc.valueTypes, tc.laneTypes) + require.Equal(t, tc.expMatched, actualMatched) + require.Equal(t, tc.expValuesMsg, actualValuesMsg) + }) + } +} + +func TestCommandActionVal_toUint64s(t *testing.T) { + tests := []struct { + name string + rawCommandActionVal string + exp []uint64 + }{ + { + name: "i32", + rawCommandActionVal: `{"type": "i32", "value": "0"}`, + exp: []uint64{0}, + }, + { + name: "i32", + rawCommandActionVal: `{"type": "i32", "value": "4294967295"}`, + exp: []uint64{4294967295}, + }, + { + name: "i64", + rawCommandActionVal: `{"type": "i64", "value": "0"}`, + exp: []uint64{0}, + }, + { + name: "i64", + rawCommandActionVal: `{"type": "i64", "value": "7034535277573963776"}`, + exp: []uint64{7034535277573963776}, + }, + { + name: "f32", + rawCommandActionVal: `{"type": "f32", "value": "0"}`, + exp: []uint64{0}, + }, + { + name: "f32", + rawCommandActionVal: `{"type": "f32", "value": "2147483648"}`, + exp: []uint64{2147483648}, + }, + { + name: "f64", + rawCommandActionVal: `{"type": "f64", "value": "0"}`, + exp: []uint64{0}, + }, + { + name: "f64", + rawCommandActionVal: `{"type": "f64", "value": "4616189618054758400"}`, + exp: []uint64{4616189618054758400}, + }, + { + name: "f32x4", + rawCommandActionVal: `{"type": "v128", "lane_type": "f32", "value": ["645922816", "645922816", "645922816", "645922816"]}`, + exp: []uint64{645922816<<32 | 645922816, 645922816<<32 | 645922816}, + }, + { + name: "f32x4", + rawCommandActionVal: `{"type": "v128", "lane_type": "f32", "value": ["nan:canonical", "nan:arithmetic", "nan:canonical", "nan:arithmetic"]}`, + exp: []uint64{ + uint64(math.Float32bits(float32(math.NaN()))) | (uint64(math.Float32bits(float32(math.NaN()))) << 32), + uint64(math.Float32bits(float32(math.NaN()))) | (uint64(math.Float32bits(float32(math.NaN()))) << 32), + }, + }, + { + name: "f64x2", + rawCommandActionVal: `{"type": "v128", "lane_type": "f64", "value": ["9223372036854775808", "9223372036854775808"]}`, + exp: []uint64{9223372036854775808, 9223372036854775808}, + }, + { + name: "f64x2", + rawCommandActionVal: `{"type": "v128", "lane_type": "f64", "value": ["nan:canonical", "nan:arithmetic"]}`, + exp: []uint64{math.Float64bits(math.NaN()), math.Float64bits(math.NaN())}, + }, + { + name: "i8x16", + rawCommandActionVal: `{"type": "v128", "lane_type": "i8", "value": ["128", "129", "130", "131", "253", "254", "255", "0", "0", "1", "2", "127", "128", "253", "254", "255"]}`, + exp: []uint64{ + 128 | (129 << 8) | (130 << 16) | (131 << 24) | (253 << 32) | (254 << 40) | (255 << 48), + 1<<8 | 2<<16 | 127<<24 | 128<<32 | 253<<40 | 254<<48 | 255<<56, + }, + }, + { + name: "i16x8", + rawCommandActionVal: `{"type": "v128", "lane_type": "i16", "value": ["256", "770", "1284", "1798", "2312", "2826", "3340", "3854"]}`, + exp: []uint64{ + 256 | 770<<16 | 1284<<32 | 1798<<48, + 2312 | 2826<<16 | 3340<<32 | 3854<<48, + }, + }, + { + name: "i32x4", + rawCommandActionVal: `{"type": "v128", "lane_type": "i32", "value": ["123", "32766", "32766", "40000"]}`, + exp: []uint64{ + 123 | 32766<<32, + 32766 | 40000<<32, + }, + }, + { + name: "i64x2", + rawCommandActionVal: `{"type": "v128", "lane_type": "i64", "value": ["18446744073709551615", "123124"]}`, + exp: []uint64{ + 18446744073709551615, + 123124, + }, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + var c commandActionVal + err := json.Unmarshal([]byte(tc.rawCommandActionVal), &c) + require.NoError(t, err) + actual := c.toUint64s() + require.Equal(t, tc.exp, actual) + }) + } +} + +func TestCommand_getAssertReturnArgsExps(t *testing.T) { + tests := []struct { + name string + rawCommand string + args, exps []uint64 + }{ + { + name: "1", + rawCommand: ` +{ + "type": "assert_return", + "line": 148, + "action": { + "type": "invoke", "field": "f32x4.min", + "args": [ + {"type": "v128", "lane_type": "f32", "value": ["2147483648", "123", "2147483648", "1"]}, + {"type": "v128", "lane_type": "i8", "value": ["128", "129", "130", "131", "253", "254", "255", "0", "0", "1", "2", "127", "128", "253", "254", "255"]} + ] + }, + "expected": [ + {"type": "v128", "lane_type": "f32", "value": ["2147483648", "0", "0", "2147483648"]} + ] +}`, + args: []uint64{ + 123<<32 | 2147483648, + 1<<32 | 2147483648, + 128 | (129 << 8) | (130 << 16) | (131 << 24) | (253 << 32) | (254 << 40) | (255 << 48), + 1<<8 | 2<<16 | 127<<24 | 128<<32 | 253<<40 | 254<<48 | 255<<56, + }, + exps: []uint64{ + 2147483648, + 2147483648 << 32, + }, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + var c command + err := json.Unmarshal([]byte(tc.rawCommand), &c) + require.NoError(t, err) + actualArgs, actualExps := c.getAssertReturnArgsExps() + require.Equal(t, tc.args, actualArgs) + require.Equal(t, tc.exps, actualExps) + }) + } +} diff --git a/internal/integration_test/spectest/v2/spec_test.go b/internal/integration_test/spectest/v2/spec_test.go index 7ebf37d9153..76838f372c6 100644 --- a/internal/integration_test/spectest/v2/spec_test.go +++ b/internal/integration_test/spectest/v2/spec_test.go @@ -4,7 +4,6 @@ import ( "embed" "path" "runtime" - "strings" "testing" "github.com/tetratelabs/wazero/internal/engine/compiler" @@ -26,43 +25,28 @@ func TestCompiler(t *testing.T) { } spectest.Run(t, testcases, compiler.NewEngine, enabledFeatures, func(jsonname string) bool { - // TODO: remove after SIMD proposal - if strings.Contains(jsonname, "simd") { - switch path.Base(jsonname) { - case "simd_address.json", "simd_const.json", "simd_align.json", "simd_load16_lane.json", "simd_load32_lane.json", - "simd_load64_lane.json", "simd_load8_lane.json", "simd_lane.json", "simd_load_extend.json", - "simd_load_splat.json", "simd_load_zero.json", "simd_store.json", "simd_store16_lane.json", - "simd_store32_lane.json", "simd_store64_lane.json", "simd_store8_lane.json": - return true - case "simd_bitwise.json", "simd_boolean.json", "simd_bit_shift.json", - "simd_i8x16_cmp.json", "simd_i16x8_cmp.json", "simd_i32x4_cmp.json", "simd_i64x2_cmp.json", - "simd_f32x4_cmp.json", "simd_f64x2_cmp.json": - // TODO: implement on arm64. - return runtime.GOARCH == "amd64" - default: - return false // others not supported, yet! - } + switch path.Base(jsonname) { + case "simd_bitwise.json", "simd_boolean.json", "simd_bit_shift.json", + "simd_i8x16_cmp.json", "simd_i16x8_cmp.json", "simd_i32x4_cmp.json", "simd_i64x2_cmp.json", + "simd_f32x4_cmp.json", "simd_f64x2_cmp.json", "simd_f32x4_arith.json", "simd_f64x2_arith.json", + "simd_i16x8_arith.json", "simd_i64x2_arith.json", "simd_i32x4_arith.json", "simd_i8x16_arith.json", + "simd_i16x8_sat_arith.json", "simd_i8x16_sat_arith.json", + "simd_i16x8_arith2.json", "simd_i8x16_arith2.json", "simd_i32x4_arith2.json", "simd_i64x2_arith2.json", + "simd_f64x2.json", "simd_f32x4.json", "simd_f32x4_rounding.json", "simd_f64x2_rounding.json", + "simd_f64x2_pmin_pmax.json", "simd_f32x4_pmin_pmax.json", "simd_int_to_int_extend.json", + "simd_i64x2_extmul_i32x4.json", "simd_i32x4_extmul_i16x8.json", "simd_i16x8_extmul_i8x16.json", + "simd_i16x8_q15mulr_sat_s.json", "simd_i16x8_extadd_pairwise_i8x16.json", "simd_i32x4_extadd_pairwise_i16x8.json", + "simd_i32x4_dot_i16x8.json", "simd_i32x4_trunc_sat_f32x4.json", + "simd_splat.json", "simd_load.json", "simd_i32x4_trunc_sat_f64x2.json", + "simd_conversions.json": + // TODO: implement on arm64. + return runtime.GOARCH == "amd64" + default: + return true } - return true }) } func TestInterpreter(t *testing.T) { - spectest.Run(t, testcases, interpreter.NewEngine, enabledFeatures, func(jsonname string) bool { - // TODO: remove after SIMD proposal - if strings.Contains(jsonname, "simd") { - switch path.Base(jsonname) { - case "simd_address.json", "simd_const.json", "simd_align.json", "simd_load16_lane.json", - "simd_load32_lane.json", "simd_load64_lane.json", "simd_load8_lane.json", "simd_lane.json", - "simd_load_extend.json", "simd_load_splat.json", "simd_load_zero.json", "simd_store.json", - "simd_store16_lane.json", "simd_store32_lane.json", "simd_store64_lane.json", "simd_store8_lane.json", - "simd_bitwise.json", "simd_boolean.json", "simd_bit_shift.json", "simd_i8x16_cmp.json", "simd_i16x8_cmp.json", - "simd_i32x4_cmp.json", "simd_i64x2_cmp.json", "simd_f32x4_cmp.json", "simd_f64x2_cmp.json": - return true - default: - return false // others not supported, yet! - } - } - return true - }) + spectest.Run(t, testcases, interpreter.NewEngine, enabledFeatures, func(string) bool { return true }) } diff --git a/internal/wasm/func_validation.go b/internal/wasm/func_validation.go index 899edda04db..a8cd9f9198a 100644 --- a/internal/wasm/func_validation.go +++ b/internal/wasm/func_validation.go @@ -714,7 +714,7 @@ func (m *Module) validateFunctionWithMaxStackValues( return fmt.Errorf("cannot pop the f64 operand for %s: %v", InstructionName(op), err) } valueTypeStack.push(ValueTypeI64) - case OpcodeF32ConvertI32s, OpcodeF32ConvertI32U: + case OpcodeF32ConvertI32S, OpcodeF32ConvertI32U: if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil { return fmt.Errorf("cannot pop the i32 operand for %s: %v", InstructionName(op), err) } @@ -1071,14 +1071,6 @@ func (m *Module) validateFunctionWithMaxStackValues( } pc += 16 valueTypeStack.push(ValueTypeV128) - case OpcodeVecI8x16Add, OpcodeVecI16x8Add, OpcodeVecI32x4Add, OpcodeVecI64x2Add, - OpcodeVecI8x16Sub, OpcodeVecI16x8Sub, OpcodeVecI32x4Sub, OpcodeVecI64x2Sub: - for i := 0; i < 2; i++ { - if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil { - return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err) - } - } - valueTypeStack.push(ValueTypeV128) case OpcodeVecV128AnyTrue, OpcodeVecI8x16AllTrue, OpcodeVecI16x8AllTrue, OpcodeVecI32x4AllTrue, OpcodeVecI64x2AllTrue, OpcodeVecI8x16BitMask, OpcodeVecI16x8BitMask, OpcodeVecI32x4BitMask, OpcodeVecI64x2BitMask: if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil { @@ -1309,7 +1301,53 @@ func (m *Module) validateFunctionWithMaxStackValues( OpcodeVecI64x2Eq, OpcodeVecI64x2Ne, OpcodeVecI64x2LtS, OpcodeVecI64x2GtS, OpcodeVecI64x2LeS, OpcodeVecI64x2GeS, OpcodeVecF32x4Eq, OpcodeVecF32x4Ne, OpcodeVecF32x4Lt, OpcodeVecF32x4Gt, OpcodeVecF32x4Le, OpcodeVecF32x4Ge, OpcodeVecF64x2Eq, OpcodeVecF64x2Ne, OpcodeVecF64x2Lt, - OpcodeVecF64x2Gt, OpcodeVecF64x2Le, OpcodeVecF64x2Ge: + OpcodeVecF64x2Gt, OpcodeVecF64x2Le, OpcodeVecF64x2Ge, + OpcodeVecI32x4DotI16x8S, + OpcodeVecI8x16NarrowI16x8S, OpcodeVecI8x16NarrowI16x8U, OpcodeVecI16x8NarrowI32x4S, OpcodeVecI16x8NarrowI32x4U: + if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil { + return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err) + } + if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil { + return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err) + } + valueTypeStack.push(ValueTypeV128) + case OpcodeVecI8x16Neg, OpcodeVecI16x8Neg, OpcodeVecI32x4Neg, OpcodeVecI64x2Neg, OpcodeVecF32x4Neg, OpcodeVecF64x2Neg, + OpcodeVecF32x4Sqrt, OpcodeVecF64x2Sqrt, + OpcodeVecI8x16Abs, OpcodeVecI8x16Popcnt, OpcodeVecI16x8Abs, OpcodeVecI32x4Abs, OpcodeVecI64x2Abs, + OpcodeVecF32x4Abs, OpcodeVecF64x2Abs, + OpcodeVecF32x4Ceil, OpcodeVecF32x4Floor, OpcodeVecF32x4Trunc, OpcodeVecF32x4Nearest, + OpcodeVecF64x2Ceil, OpcodeVecF64x2Floor, OpcodeVecF64x2Trunc, OpcodeVecF64x2Nearest, + OpcodeVecI16x8ExtendLowI8x16S, OpcodeVecI16x8ExtendHighI8x16S, OpcodeVecI16x8ExtendLowI8x16U, OpcodeVecI16x8ExtendHighI8x16U, + OpcodeVecI32x4ExtendLowI16x8S, OpcodeVecI32x4ExtendHighI16x8S, OpcodeVecI32x4ExtendLowI16x8U, OpcodeVecI32x4ExtendHighI16x8U, + OpcodeVecI64x2ExtendLowI32x4S, OpcodeVecI64x2ExtendHighI32x4S, OpcodeVecI64x2ExtendLowI32x4U, OpcodeVecI64x2ExtendHighI32x4U, + OpcodeVecI16x8ExtaddPairwiseI8x16S, OpcodeVecI16x8ExtaddPairwiseI8x16U, + OpcodeVecI32x4ExtaddPairwiseI16x8S, OpcodeVecI32x4ExtaddPairwiseI16x8U, + OpcodeVecF64x2PromoteLowF32x4Zero, OpcodeVecF32x4DemoteF64x2Zero, + OpcodeVecF32x4ConvertI32x4S, OpcodeVecF32x4ConvertI32x4U, + OpcodeVecF64x2ConvertLowI32x4S, OpcodeVecF64x2ConvertLowI32x4U, + OpcodeVecI32x4TruncSatF32x4S, OpcodeVecI32x4TruncSatF32x4U, OpcodeVecI32x4TruncSatF64x2SZero, OpcodeVecI32x4TruncSatF64x2UZero: + if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil { + return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err) + } + valueTypeStack.push(ValueTypeV128) + + case OpcodeVecI8x16Add, OpcodeVecI8x16AddSatS, OpcodeVecI8x16AddSatU, OpcodeVecI8x16Sub, OpcodeVecI8x16SubSatS, OpcodeVecI8x16SubSatU, + OpcodeVecI16x8Add, OpcodeVecI16x8AddSatS, OpcodeVecI16x8AddSatU, OpcodeVecI16x8Sub, OpcodeVecI16x8SubSatS, OpcodeVecI16x8SubSatU, OpcodeVecI16x8Mul, + OpcodeVecI32x4Add, OpcodeVecI32x4Sub, OpcodeVecI32x4Mul, + OpcodeVecI64x2Add, OpcodeVecI64x2Sub, OpcodeVecI64x2Mul, + OpcodeVecF32x4Add, OpcodeVecF32x4Sub, OpcodeVecF32x4Mul, OpcodeVecF32x4Div, + OpcodeVecF64x2Add, OpcodeVecF64x2Sub, OpcodeVecF64x2Mul, OpcodeVecF64x2Div, + OpcodeVecI8x16MinS, OpcodeVecI8x16MinU, OpcodeVecI8x16MaxS, OpcodeVecI8x16MaxU, + OpcodeVecI8x16AvgrU, + OpcodeVecI16x8MinS, OpcodeVecI16x8MinU, OpcodeVecI16x8MaxS, OpcodeVecI16x8MaxU, + OpcodeVecI16x8AvgrU, + OpcodeVecI32x4MinS, OpcodeVecI32x4MinU, OpcodeVecI32x4MaxS, OpcodeVecI32x4MaxU, + OpcodeVecF32x4Min, OpcodeVecF32x4Max, OpcodeVecF64x2Min, OpcodeVecF64x2Max, + OpcodeVecF32x4Pmin, OpcodeVecF32x4Pmax, OpcodeVecF64x2Pmin, OpcodeVecF64x2Pmax, + OpcodeVecI16x8Q15mulrSatS, + OpcodeVecI16x8ExtMulLowI8x16S, OpcodeVecI16x8ExtMulHighI8x16S, OpcodeVecI16x8ExtMulLowI8x16U, OpcodeVecI16x8ExtMulHighI8x16U, + OpcodeVecI32x4ExtMulLowI16x8S, OpcodeVecI32x4ExtMulHighI16x8S, OpcodeVecI32x4ExtMulLowI16x8U, OpcodeVecI32x4ExtMulHighI16x8U, + OpcodeVecI64x2ExtMulLowI32x4S, OpcodeVecI64x2ExtMulHighI32x4S, OpcodeVecI64x2ExtMulLowI32x4U, OpcodeVecI64x2ExtMulHighI32x4U: if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil { return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err) } diff --git a/internal/wasm/func_validation_test.go b/internal/wasm/func_validation_test.go index d926bb77ea5..11e37a90599 100644 --- a/internal/wasm/func_validation_test.go +++ b/internal/wasm/func_validation_test.go @@ -3020,6 +3020,122 @@ func TestModule_funcValidation_SIMD(t *testing.T) { {name: OpcodeVecF64x2GtName, body: vv2v(OpcodeVecF64x2Gt)}, {name: OpcodeVecF64x2LeName, body: vv2v(OpcodeVecF64x2Le)}, {name: OpcodeVecF64x2GeName, body: vv2v(OpcodeVecF64x2Ge)}, + {name: OpcodeVecI8x16AddName, body: vv2v(OpcodeVecI8x16Add)}, + {name: OpcodeVecI8x16AddSatSName, body: vv2v(OpcodeVecI8x16AddSatS)}, + {name: OpcodeVecI8x16AddSatUName, body: vv2v(OpcodeVecI8x16AddSatU)}, + {name: OpcodeVecI8x16SubName, body: vv2v(OpcodeVecI8x16Sub)}, + {name: OpcodeVecI8x16SubSatSName, body: vv2v(OpcodeVecI8x16SubSatS)}, + {name: OpcodeVecI8x16SubSatUName, body: vv2v(OpcodeVecI8x16SubSatU)}, + {name: OpcodeVecI16x8AddName, body: vv2v(OpcodeVecI16x8Add)}, + {name: OpcodeVecI16x8AddSatSName, body: vv2v(OpcodeVecI16x8AddSatS)}, + {name: OpcodeVecI16x8AddSatUName, body: vv2v(OpcodeVecI16x8AddSatU)}, + {name: OpcodeVecI16x8SubName, body: vv2v(OpcodeVecI16x8Sub)}, + {name: OpcodeVecI16x8SubSatSName, body: vv2v(OpcodeVecI16x8SubSatS)}, + {name: OpcodeVecI16x8SubSatUName, body: vv2v(OpcodeVecI16x8SubSatU)}, + {name: OpcodeVecI16x8MulName, body: vv2v(OpcodeVecI16x8Mul)}, + {name: OpcodeVecI32x4AddName, body: vv2v(OpcodeVecI32x4Add)}, + {name: OpcodeVecI32x4SubName, body: vv2v(OpcodeVecI32x4Sub)}, + {name: OpcodeVecI32x4MulName, body: vv2v(OpcodeVecI32x4Mul)}, + {name: OpcodeVecI64x2AddName, body: vv2v(OpcodeVecI64x2Add)}, + {name: OpcodeVecI64x2SubName, body: vv2v(OpcodeVecI64x2Sub)}, + {name: OpcodeVecI64x2MulName, body: vv2v(OpcodeVecI64x2Mul)}, + {name: OpcodeVecF32x4AddName, body: vv2v(OpcodeVecF32x4Add)}, + {name: OpcodeVecF32x4SubName, body: vv2v(OpcodeVecF32x4Sub)}, + {name: OpcodeVecF32x4MulName, body: vv2v(OpcodeVecF32x4Mul)}, + {name: OpcodeVecF32x4DivName, body: vv2v(OpcodeVecF32x4Div)}, + {name: OpcodeVecF64x2AddName, body: vv2v(OpcodeVecF64x2Add)}, + {name: OpcodeVecF64x2SubName, body: vv2v(OpcodeVecF64x2Sub)}, + {name: OpcodeVecF64x2MulName, body: vv2v(OpcodeVecF64x2Mul)}, + {name: OpcodeVecF64x2DivName, body: vv2v(OpcodeVecF64x2Div)}, + {name: OpcodeVecI8x16NegName, body: v2v(OpcodeVecI8x16Neg)}, + {name: OpcodeVecI16x8NegName, body: v2v(OpcodeVecI16x8Neg)}, + {name: OpcodeVecI32x4NegName, body: v2v(OpcodeVecI32x4Neg)}, + {name: OpcodeVecI64x2NegName, body: v2v(OpcodeVecI64x2Neg)}, + {name: OpcodeVecF32x4NegName, body: v2v(OpcodeVecF32x4Neg)}, + {name: OpcodeVecF64x2NegName, body: v2v(OpcodeVecF64x2Neg)}, + {name: OpcodeVecF32x4SqrtName, body: v2v(OpcodeVecF32x4Sqrt)}, + {name: OpcodeVecF64x2SqrtName, body: v2v(OpcodeVecF64x2Sqrt)}, + {name: OpcodeVecI8x16MinSName, body: vv2v(OpcodeVecI8x16MinS)}, + {name: OpcodeVecI8x16MinUName, body: vv2v(OpcodeVecI8x16MinU)}, + {name: OpcodeVecI8x16MaxSName, body: vv2v(OpcodeVecI8x16MaxS)}, + {name: OpcodeVecI8x16MaxUName, body: vv2v(OpcodeVecI8x16MaxU)}, + {name: OpcodeVecI8x16AvgrUName, body: vv2v(OpcodeVecI8x16AvgrU)}, + {name: OpcodeVecI8x16AbsName, body: v2v(OpcodeVecI8x16Abs)}, + {name: OpcodeVecI8x16PopcntName, body: v2v(OpcodeVecI8x16Popcnt)}, + {name: OpcodeVecI16x8MinSName, body: vv2v(OpcodeVecI16x8MinS)}, + {name: OpcodeVecI16x8MinUName, body: vv2v(OpcodeVecI16x8MinU)}, + {name: OpcodeVecI16x8MaxSName, body: vv2v(OpcodeVecI16x8MaxS)}, + {name: OpcodeVecI16x8MaxUName, body: vv2v(OpcodeVecI16x8MaxU)}, + {name: OpcodeVecI16x8AvgrUName, body: vv2v(OpcodeVecI16x8AvgrU)}, + {name: OpcodeVecI16x8AbsName, body: v2v(OpcodeVecI16x8Abs)}, + {name: OpcodeVecI32x4MinSName, body: vv2v(OpcodeVecI32x4MinS)}, + {name: OpcodeVecI32x4MinUName, body: vv2v(OpcodeVecI32x4MinU)}, + {name: OpcodeVecI32x4MaxSName, body: vv2v(OpcodeVecI32x4MaxS)}, + {name: OpcodeVecI32x4MaxUName, body: vv2v(OpcodeVecI32x4MaxU)}, + {name: OpcodeVecI32x4AbsName, body: v2v(OpcodeVecI32x4Abs)}, + {name: OpcodeVecI64x2AbsName, body: v2v(OpcodeVecI64x2Abs)}, + {name: OpcodeVecF32x4AbsName, body: v2v(OpcodeVecF32x4Abs)}, + {name: OpcodeVecF64x2AbsName, body: v2v(OpcodeVecF64x2Abs)}, + {name: OpcodeVecF32x4MinName, body: vv2v(OpcodeVecF32x4Min)}, + {name: OpcodeVecF32x4MaxName, body: vv2v(OpcodeVecF32x4Max)}, + {name: OpcodeVecF64x2MinName, body: vv2v(OpcodeVecF64x2Min)}, + {name: OpcodeVecF64x2MaxName, body: vv2v(OpcodeVecF64x2Max)}, + {name: OpcodeVecF32x4CeilName, body: v2v(OpcodeVecF32x4Ceil)}, + {name: OpcodeVecF32x4FloorName, body: v2v(OpcodeVecF32x4Floor)}, + {name: OpcodeVecF32x4TruncName, body: v2v(OpcodeVecF32x4Trunc)}, + {name: OpcodeVecF32x4NearestName, body: v2v(OpcodeVecF32x4Nearest)}, + {name: OpcodeVecF64x2CeilName, body: v2v(OpcodeVecF64x2Ceil)}, + {name: OpcodeVecF64x2FloorName, body: v2v(OpcodeVecF64x2Floor)}, + {name: OpcodeVecF64x2TruncName, body: v2v(OpcodeVecF64x2Trunc)}, + {name: OpcodeVecF64x2NearestName, body: v2v(OpcodeVecF64x2Nearest)}, + {name: OpcodeVecF32x4MinName, body: vv2v(OpcodeVecF32x4Pmin)}, + {name: OpcodeVecF32x4MaxName, body: vv2v(OpcodeVecF32x4Pmax)}, + {name: OpcodeVecF64x2MinName, body: vv2v(OpcodeVecF64x2Pmin)}, + {name: OpcodeVecF64x2MaxName, body: vv2v(OpcodeVecF64x2Pmax)}, + {name: OpcodeVecI16x8ExtendLowI8x16SName, body: v2v(OpcodeVecI16x8ExtendLowI8x16S)}, + {name: OpcodeVecI16x8ExtendHighI8x16SName, body: v2v(OpcodeVecI16x8ExtendHighI8x16S)}, + {name: OpcodeVecI16x8ExtendLowI8x16UName, body: v2v(OpcodeVecI16x8ExtendLowI8x16U)}, + {name: OpcodeVecI16x8ExtendHighI8x16UName, body: v2v(OpcodeVecI16x8ExtendHighI8x16U)}, + {name: OpcodeVecI32x4ExtendLowI16x8SName, body: v2v(OpcodeVecI32x4ExtendLowI16x8S)}, + {name: OpcodeVecI32x4ExtendHighI16x8SName, body: v2v(OpcodeVecI32x4ExtendHighI16x8S)}, + {name: OpcodeVecI32x4ExtendLowI16x8UName, body: v2v(OpcodeVecI32x4ExtendLowI16x8U)}, + {name: OpcodeVecI32x4ExtendHighI16x8UName, body: v2v(OpcodeVecI32x4ExtendHighI16x8U)}, + {name: OpcodeVecI64x2ExtendLowI32x4SName, body: v2v(OpcodeVecI64x2ExtendLowI32x4S)}, + {name: OpcodeVecI64x2ExtendHighI32x4SName, body: v2v(OpcodeVecI64x2ExtendHighI32x4S)}, + {name: OpcodeVecI64x2ExtendLowI32x4UName, body: v2v(OpcodeVecI64x2ExtendLowI32x4U)}, + {name: OpcodeVecI64x2ExtendHighI32x4UName, body: v2v(OpcodeVecI64x2ExtendHighI32x4U)}, + {name: OpcodeVecI16x8Q15mulrSatSName, body: vv2v(OpcodeVecI16x8Q15mulrSatS)}, + {name: OpcodeVecI16x8ExtMulLowI8x16SName, body: vv2v(OpcodeVecI16x8ExtMulLowI8x16S)}, + {name: OpcodeVecI16x8ExtMulHighI8x16SName, body: vv2v(OpcodeVecI16x8ExtMulHighI8x16S)}, + {name: OpcodeVecI16x8ExtMulLowI8x16UName, body: vv2v(OpcodeVecI16x8ExtMulLowI8x16U)}, + {name: OpcodeVecI16x8ExtMulHighI8x16UName, body: vv2v(OpcodeVecI16x8ExtMulHighI8x16U)}, + {name: OpcodeVecI32x4ExtMulLowI16x8SName, body: vv2v(OpcodeVecI32x4ExtMulLowI16x8S)}, + {name: OpcodeVecI32x4ExtMulHighI16x8SName, body: vv2v(OpcodeVecI32x4ExtMulHighI16x8S)}, + {name: OpcodeVecI32x4ExtMulLowI16x8UName, body: vv2v(OpcodeVecI32x4ExtMulLowI16x8U)}, + {name: OpcodeVecI32x4ExtMulHighI16x8UName, body: vv2v(OpcodeVecI32x4ExtMulHighI16x8U)}, + {name: OpcodeVecI64x2ExtMulLowI32x4SName, body: vv2v(OpcodeVecI64x2ExtMulLowI32x4S)}, + {name: OpcodeVecI64x2ExtMulHighI32x4SName, body: vv2v(OpcodeVecI64x2ExtMulHighI32x4S)}, + {name: OpcodeVecI64x2ExtMulLowI32x4UName, body: vv2v(OpcodeVecI64x2ExtMulLowI32x4U)}, + {name: OpcodeVecI64x2ExtMulHighI32x4UName, body: vv2v(OpcodeVecI64x2ExtMulHighI32x4U)}, + {name: OpcodeVecI16x8ExtaddPairwiseI8x16SName, body: v2v(OpcodeVecI16x8ExtaddPairwiseI8x16S)}, + {name: OpcodeVecI16x8ExtaddPairwiseI8x16UName, body: v2v(OpcodeVecI16x8ExtaddPairwiseI8x16U)}, + {name: OpcodeVecI32x4ExtaddPairwiseI16x8SName, body: v2v(OpcodeVecI32x4ExtaddPairwiseI16x8S)}, + {name: OpcodeVecI32x4ExtaddPairwiseI16x8UName, body: v2v(OpcodeVecI32x4ExtaddPairwiseI16x8U)}, + {name: OpcodeVecF64x2PromoteLowF32x4ZeroName, body: v2v(OpcodeVecF64x2PromoteLowF32x4Zero)}, + {name: OpcodeVecF32x4DemoteF64x2ZeroName, body: v2v(OpcodeVecF32x4DemoteF64x2Zero)}, + {name: OpcodeVecF32x4ConvertI32x4SName, body: v2v(OpcodeVecF32x4ConvertI32x4S)}, + {name: OpcodeVecF32x4ConvertI32x4UName, body: v2v(OpcodeVecF32x4ConvertI32x4U)}, + {name: OpcodeVecF64x2ConvertLowI32x4SName, body: v2v(OpcodeVecF64x2ConvertLowI32x4S)}, + {name: OpcodeVecF64x2ConvertLowI32x4UName, body: v2v(OpcodeVecF64x2ConvertLowI32x4U)}, + {name: OpcodeVecI32x4DotI16x8SName, body: vv2v(OpcodeVecI32x4DotI16x8S)}, + {name: OpcodeVecI8x16NarrowI16x8SName, body: vv2v(OpcodeVecI8x16NarrowI16x8S)}, + {name: OpcodeVecI8x16NarrowI16x8UName, body: vv2v(OpcodeVecI8x16NarrowI16x8U)}, + {name: OpcodeVecI16x8NarrowI32x4SName, body: vv2v(OpcodeVecI16x8NarrowI32x4S)}, + {name: OpcodeVecI16x8NarrowI32x4UName, body: vv2v(OpcodeVecI16x8NarrowI32x4U)}, + {name: OpcodeVecI32x4TruncSatF32x4SName, body: v2v(OpcodeVecI32x4TruncSatF32x4S)}, + {name: OpcodeVecI32x4TruncSatF32x4UName, body: v2v(OpcodeVecI32x4TruncSatF32x4U)}, + {name: OpcodeVecI32x4TruncSatF64x2SZeroName, body: v2v(OpcodeVecI32x4TruncSatF64x2SZero)}, + {name: OpcodeVecI32x4TruncSatF64x2UZeroName, body: v2v(OpcodeVecI32x4TruncSatF64x2UZero)}, } for _, tt := range tests { @@ -3115,16 +3231,6 @@ func TestModule_funcValidation_SIMD_error(t *testing.T) { }, expectedErr: "invalid lane index[0] 255 >= 32 for v128.shuffle", }, - { - // TODO delete this case after SIMD impl completion. - name: "unimplemented", - body: []byte{ - OpcodeVecPrefix, - OpcodeVecF32x4DemoteF64x2Zero, - }, - flag: FeatureSIMD, - expectedErr: "TODO: SIMD instruction f32x4.demote_f64x2_zero will be implemented in #506", - }, } addExtractOrReplaceLaneOutOfIndexCase := func(op OpcodeVec, lane, laneCeil byte) { diff --git a/internal/wasm/instruction.go b/internal/wasm/instruction.go index 8cab0bd2d68..857633ffcb4 100644 --- a/internal/wasm/instruction.go +++ b/internal/wasm/instruction.go @@ -218,7 +218,7 @@ const ( OpcodeI64TruncF64S Opcode = 0xb0 OpcodeI64TruncF64U Opcode = 0xb1 - OpcodeF32ConvertI32s Opcode = 0xb2 + OpcodeF32ConvertI32S Opcode = 0xb2 OpcodeF32ConvertI32U Opcode = 0xb3 OpcodeF32ConvertI64S Opcode = 0xb4 OpcodeF32ConvertI64U Opcode = 0xb5 @@ -487,7 +487,7 @@ const ( OpcodeVecI8x16MinU OpcodeVec = 0x77 OpcodeVecI8x16MaxS OpcodeVec = 0x78 OpcodeVecI8x16MaxU OpcodeVec = 0x79 - OpcodeVecI8x16ArgrU OpcodeVec = 0x7b + OpcodeVecI8x16AvgrU OpcodeVec = 0x7b // i16 misc. @@ -495,7 +495,7 @@ const ( OpcodeVecI16x8ExtaddPairwiseI8x16U OpcodeVec = 0x7d OpcodeVecI16x8Abs OpcodeVec = 0x80 OpcodeVecI16x8Neg OpcodeVec = 0x81 - OpcodeVecI16x8Q16mulrSatS OpcodeVec = 0x82 + OpcodeVecI16x8Q15mulrSatS OpcodeVec = 0x82 OpcodeVecI16x8AllTrue OpcodeVec = 0x83 OpcodeVecI16x8BitMask OpcodeVec = 0x84 OpcodeVecI16x8NarrowI32x4S OpcodeVec = 0x85 @@ -518,7 +518,7 @@ const ( OpcodeVecI16x8MinU OpcodeVec = 0x97 OpcodeVecI16x8MaxS OpcodeVec = 0x98 OpcodeVecI16x8MaxU OpcodeVec = 0x99 - OpcodeVecI16x8ArgrU OpcodeVec = 0x9b + OpcodeVecI16x8AvgrU OpcodeVec = 0x9b OpcodeVecI16x8ExtMulLowI8x16S OpcodeVec = 0x9c OpcodeVecI16x8ExtMulHighI8x16S OpcodeVec = 0x9d OpcodeVecI16x8ExtMulLowI8x16U OpcodeVec = 0x9e @@ -617,8 +617,8 @@ const ( OpcodeVecF32x4ConvertI32x4U OpcodeVec = 0xfb OpcodeVecI32x4TruncSatF64x2SZero OpcodeVec = 0xfc OpcodeVecI32x4TruncSatF64x2UZero OpcodeVec = 0xfd - OpcodeVecF64x2ConvertI32x4S OpcodeVec = 0xfe - OpcodeVecF64x2ConvertI32x4U OpcodeVec = 0xff + OpcodeVecF64x2ConvertLowI32x4S OpcodeVec = 0xfe + OpcodeVecF64x2ConvertLowI32x4U OpcodeVec = 0xff OpcodeVecF32x4DemoteF64x2Zero OpcodeVec = 0x5e OpcodeVecF64x2PromoteLowF32x4Zero OpcodeVec = 0x5f ) @@ -783,7 +783,7 @@ const ( OpcodeI64TruncF32UName = "i64.trunc_f32_u" OpcodeI64TruncF64SName = "i64.trunc_f64_s" OpcodeI64TruncF64UName = "i64.trunc_f64_u" - OpcodeF32ConvertI32sName = "f32.convert_i32_s" + OpcodeF32ConvertI32SName = "f32.convert_i32_s" OpcodeF32ConvertI32UName = "f32.convert_i32_u" OpcodeF32ConvertI64SName = "f32.convert_i64_s" OpcodeF32ConvertI64UName = "f32.convert_i64u" @@ -977,7 +977,7 @@ var instructionNames = [256]string{ OpcodeI64TruncF32U: OpcodeI64TruncF32UName, OpcodeI64TruncF64S: OpcodeI64TruncF64SName, OpcodeI64TruncF64U: OpcodeI64TruncF64UName, - OpcodeF32ConvertI32s: OpcodeF32ConvertI32sName, + OpcodeF32ConvertI32S: OpcodeF32ConvertI32SName, OpcodeF32ConvertI32U: OpcodeF32ConvertI32UName, OpcodeF32ConvertI64S: OpcodeF32ConvertI64SName, OpcodeF32ConvertI64U: OpcodeF32ConvertI64UName, @@ -1187,12 +1187,12 @@ const ( OpcodeVecI8x16MinUName = "i8x16.min_u" OpcodeVecI8x16MaxSName = "i8x16.max_s" OpcodeVecI8x16MaxUName = "i8x16.max_u" - OpcodeVecI8x16ArgrUName = "i8x16.argr_u" + OpcodeVecI8x16AvgrUName = "i8x16.avgr_u" OpcodeVecI16x8ExtaddPairwiseI8x16SName = "i16x8.extadd_pairwise_i8x16_s" OpcodeVecI16x8ExtaddPairwiseI8x16UName = "i16x8.extadd_pairwise_i8x16_u" OpcodeVecI16x8AbsName = "i16x8.abs" OpcodeVecI16x8NegName = "i16x8.neg" - OpcodeVecI16x8Q16mulrSatSName = "i16x8.q15mulr_sat_s" + OpcodeVecI16x8Q15mulrSatSName = "i16x8.q15mulr_sat_s" OpcodeVecI16x8AllTrueName = "i16x8.all_true" OpcodeVecI16x8BitMaskName = "i16x8.bitmask" OpcodeVecI16x8NarrowI32x4SName = "i16x8.narrow_i32x4_s" @@ -1215,7 +1215,7 @@ const ( OpcodeVecI16x8MinUName = "i16x8.min_u" OpcodeVecI16x8MaxSName = "i16x8.max_s" OpcodeVecI16x8MaxUName = "i16x8.max_u" - OpcodeVecI16x8ArgrUName = "i16x8.argr_u" + OpcodeVecI16x8AvgrUName = "i16x8.avgr_u" OpcodeVecI16x8ExtMulLowI8x16SName = "i16x8.extmul_low_i8x16_s" OpcodeVecI16x8ExtMulHighI8x16SName = "i16x8.extmul_high_i8x16_s" OpcodeVecI16x8ExtMulLowI8x16UName = "i16x8.extmul_low_i8x16_u" @@ -1299,8 +1299,8 @@ const ( OpcodeVecF32x4ConvertI32x4UName = "f32x4.convert_i32x4_u" OpcodeVecI32x4TruncSatF64x2SZeroName = "i32x4.trunc_sat_f64x2_s_zero" OpcodeVecI32x4TruncSatF64x2UZeroName = "i32x4.trunc_sat_f64x2_u_zero" - OpcodeVecF64x2ConvertI32x4SName = "f64x2.convert_low_i32x4_s" - OpcodeVecF64x2ConvertI32x4UName = "f64x2.convert_low_i32x4_u" + OpcodeVecF64x2ConvertLowI32x4SName = "f64x2.convert_low_i32x4_s" + OpcodeVecF64x2ConvertLowI32x4UName = "f64x2.convert_low_i32x4_u" OpcodeVecF32x4DemoteF64x2ZeroName = "f32x4.demote_f64x2_zero" OpcodeVecF64x2PromoteLowF32x4ZeroName = "f64x2.promote_low_f32x4" ) @@ -1426,12 +1426,12 @@ var vectorInstructionName = map[OpcodeVec]string{ OpcodeVecI8x16MinU: OpcodeVecI8x16MinUName, OpcodeVecI8x16MaxS: OpcodeVecI8x16MaxSName, OpcodeVecI8x16MaxU: OpcodeVecI8x16MaxUName, - OpcodeVecI8x16ArgrU: OpcodeVecI8x16ArgrUName, + OpcodeVecI8x16AvgrU: OpcodeVecI8x16AvgrUName, OpcodeVecI16x8ExtaddPairwiseI8x16S: OpcodeVecI16x8ExtaddPairwiseI8x16SName, OpcodeVecI16x8ExtaddPairwiseI8x16U: OpcodeVecI16x8ExtaddPairwiseI8x16UName, OpcodeVecI16x8Abs: OpcodeVecI16x8AbsName, OpcodeVecI16x8Neg: OpcodeVecI16x8NegName, - OpcodeVecI16x8Q16mulrSatS: OpcodeVecI16x8Q16mulrSatSName, + OpcodeVecI16x8Q15mulrSatS: OpcodeVecI16x8Q15mulrSatSName, OpcodeVecI16x8AllTrue: OpcodeVecI16x8AllTrueName, OpcodeVecI16x8BitMask: OpcodeVecI16x8BitMaskName, OpcodeVecI16x8NarrowI32x4S: OpcodeVecI16x8NarrowI32x4SName, @@ -1454,7 +1454,7 @@ var vectorInstructionName = map[OpcodeVec]string{ OpcodeVecI16x8MinU: OpcodeVecI16x8MinUName, OpcodeVecI16x8MaxS: OpcodeVecI16x8MaxSName, OpcodeVecI16x8MaxU: OpcodeVecI16x8MaxUName, - OpcodeVecI16x8ArgrU: OpcodeVecI16x8ArgrUName, + OpcodeVecI16x8AvgrU: OpcodeVecI16x8AvgrUName, OpcodeVecI16x8ExtMulLowI8x16S: OpcodeVecI16x8ExtMulLowI8x16SName, OpcodeVecI16x8ExtMulHighI8x16S: OpcodeVecI16x8ExtMulHighI8x16SName, OpcodeVecI16x8ExtMulLowI8x16U: OpcodeVecI16x8ExtMulLowI8x16UName, @@ -1538,8 +1538,8 @@ var vectorInstructionName = map[OpcodeVec]string{ OpcodeVecF32x4ConvertI32x4U: OpcodeVecF32x4ConvertI32x4UName, OpcodeVecI32x4TruncSatF64x2SZero: OpcodeVecI32x4TruncSatF64x2SZeroName, OpcodeVecI32x4TruncSatF64x2UZero: OpcodeVecI32x4TruncSatF64x2UZeroName, - OpcodeVecF64x2ConvertI32x4S: OpcodeVecF64x2ConvertI32x4SName, - OpcodeVecF64x2ConvertI32x4U: OpcodeVecF64x2ConvertI32x4UName, + OpcodeVecF64x2ConvertLowI32x4S: OpcodeVecF64x2ConvertLowI32x4SName, + OpcodeVecF64x2ConvertLowI32x4U: OpcodeVecF64x2ConvertLowI32x4UName, OpcodeVecF32x4DemoteF64x2Zero: OpcodeVecF32x4DemoteF64x2ZeroName, OpcodeVecF64x2PromoteLowF32x4Zero: OpcodeVecF64x2PromoteLowF32x4ZeroName, } diff --git a/internal/wasm/module.go b/internal/wasm/module.go index cac6207f7cf..c5b4be9ffd4 100644 --- a/internal/wasm/module.go +++ b/internal/wasm/module.go @@ -1006,7 +1006,7 @@ const ( ValueTypeF32 = api.ValueTypeF32 ValueTypeF64 = api.ValueTypeF64 // TODO: ValueTypeV128 is not exposed in the api pkg yet. - ValueTypeV128 = 0x7b + ValueTypeV128 ValueType = 0x7b // TODO: ValueTypeFuncref is not exposed in the api pkg yet. ValueTypeFuncref ValueType = 0x70 ValueTypeExternref = api.ValueTypeExternref diff --git a/internal/wazeroir/compiler.go b/internal/wazeroir/compiler.go index 0aa1a81778e..59b08350517 100644 --- a/internal/wazeroir/compiler.go +++ b/internal/wazeroir/compiler.go @@ -947,7 +947,7 @@ operatorSwitch: return err } c.emit( - &OperationStore8{Type: UnsignedInt32, Arg: imm}, + &OperationStore8{Arg: imm}, ) case wasm.OpcodeI32Store16: imm, err := c.readMemoryArg(wasm.OpcodeI32Store16Name) @@ -955,7 +955,7 @@ operatorSwitch: return err } c.emit( - &OperationStore16{Type: UnsignedInt32, Arg: imm}, + &OperationStore16{Arg: imm}, ) case wasm.OpcodeI64Store8: imm, err := c.readMemoryArg(wasm.OpcodeI64Store8Name) @@ -963,7 +963,7 @@ operatorSwitch: return err } c.emit( - &OperationStore8{Type: UnsignedInt64, Arg: imm}, + &OperationStore8{Arg: imm}, ) case wasm.OpcodeI64Store16: imm, err := c.readMemoryArg(wasm.OpcodeI64Store16Name) @@ -971,7 +971,7 @@ operatorSwitch: return err } c.emit( - &OperationStore16{Type: UnsignedInt64, Arg: imm}, + &OperationStore16{Arg: imm}, ) case wasm.OpcodeI64Store32: imm, err := c.readMemoryArg(wasm.OpcodeI64Store32Name) @@ -1457,7 +1457,7 @@ operatorSwitch: c.emit( &OperationITruncFromF{InputType: Float64, OutputType: SignedUint64}, ) - case wasm.OpcodeF32ConvertI32s: + case wasm.OpcodeF32ConvertI32S: c.emit( &OperationFConvertFromI{InputType: SignedInt32, OutputType: Float32}, ) @@ -1730,45 +1730,13 @@ operatorSwitch: &OperationV128Const{Lo: lo, Hi: hi}, ) c.pc += 7 - case wasm.OpcodeVecI8x16Add: - c.emit( - &OperationV128Add{Shape: ShapeI8x16}, - ) - case wasm.OpcodeVecI16x8Add: - c.emit( - &OperationV128Add{Shape: ShapeI16x8}, - ) - case wasm.OpcodeVecI32x4Add: - c.emit( - &OperationV128Add{Shape: ShapeI32x4}, - ) - case wasm.OpcodeVecI64x2Add: - c.emit( - &OperationV128Add{Shape: ShapeI64x2}, - ) - case wasm.OpcodeVecI8x16Sub: - c.emit( - &OperationV128Sub{Shape: ShapeI8x16}, - ) - case wasm.OpcodeVecI16x8Sub: - c.emit( - &OperationV128Sub{Shape: ShapeI16x8}, - ) - case wasm.OpcodeVecI32x4Sub: - c.emit( - &OperationV128Sub{Shape: ShapeI32x4}, - ) - case wasm.OpcodeVecI64x2Sub: - c.emit( - &OperationV128Sub{Shape: ShapeI64x2}, - ) case wasm.OpcodeVecV128Load: arg, err := c.readMemoryArg(wasm.OpcodeI32LoadName) if err != nil { return err } c.emit( - &OperationV128Load{Type: LoadV128Type128, Arg: arg}, + &OperationV128Load{Type: V128LoadType128, Arg: arg}, ) case wasm.OpcodeVecV128Load8x8s: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8x8SName) @@ -1776,7 +1744,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type8x8s, Arg: arg}, + &OperationV128Load{Type: V128LoadType8x8s, Arg: arg}, ) case wasm.OpcodeVecV128Load8x8u: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8x8UName) @@ -1784,7 +1752,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type8x8u, Arg: arg}, + &OperationV128Load{Type: V128LoadType8x8u, Arg: arg}, ) case wasm.OpcodeVecV128Load16x4s: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16x4SName) @@ -1792,7 +1760,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type16x4s, Arg: arg}, + &OperationV128Load{Type: V128LoadType16x4s, Arg: arg}, ) case wasm.OpcodeVecV128Load16x4u: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16x4UName) @@ -1800,7 +1768,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type16x4u, Arg: arg}, + &OperationV128Load{Type: V128LoadType16x4u, Arg: arg}, ) case wasm.OpcodeVecV128Load32x2s: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32x2SName) @@ -1808,7 +1776,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type32x2s, Arg: arg}, + &OperationV128Load{Type: V128LoadType32x2s, Arg: arg}, ) case wasm.OpcodeVecV128Load32x2u: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32x2UName) @@ -1816,7 +1784,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type32x2u, Arg: arg}, + &OperationV128Load{Type: V128LoadType32x2u, Arg: arg}, ) case wasm.OpcodeVecV128Load8Splat: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8SplatName) @@ -1824,7 +1792,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type8Splat, Arg: arg}, + &OperationV128Load{Type: V128LoadType8Splat, Arg: arg}, ) case wasm.OpcodeVecV128Load16Splat: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16SplatName) @@ -1832,7 +1800,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type16Splat, Arg: arg}, + &OperationV128Load{Type: V128LoadType16Splat, Arg: arg}, ) case wasm.OpcodeVecV128Load32Splat: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32SplatName) @@ -1840,7 +1808,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type32Splat, Arg: arg}, + &OperationV128Load{Type: V128LoadType32Splat, Arg: arg}, ) case wasm.OpcodeVecV128Load64Splat: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load64SplatName) @@ -1848,7 +1816,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type64Splat, Arg: arg}, + &OperationV128Load{Type: V128LoadType64Splat, Arg: arg}, ) case wasm.OpcodeVecV128Load32zero: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32zeroName) @@ -1856,7 +1824,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type32zero, Arg: arg}, + &OperationV128Load{Type: V128LoadType32zero, Arg: arg}, ) case wasm.OpcodeVecV128Load64zero: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load64zeroName) @@ -1864,7 +1832,7 @@ operatorSwitch: return err } c.emit( - &OperationV128Load{Type: LoadV128Type64zero, Arg: arg}, + &OperationV128Load{Type: V128LoadType64zero, Arg: arg}, ) case wasm.OpcodeVecV128Load8Lane: arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8LaneName) @@ -2372,6 +2340,470 @@ operatorSwitch: c.emit( &OperationV128Cmp{Type: V128CmpTypeF64x2Ge}, ) + case wasm.OpcodeVecI8x16Neg: + c.emit( + &OperationV128Neg{Shape: ShapeI8x16}, + ) + case wasm.OpcodeVecI16x8Neg: + c.emit( + &OperationV128Neg{Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI32x4Neg: + c.emit( + &OperationV128Neg{Shape: ShapeI32x4}, + ) + case wasm.OpcodeVecI64x2Neg: + c.emit( + &OperationV128Neg{Shape: ShapeI64x2}, + ) + case wasm.OpcodeVecF32x4Neg: + c.emit( + &OperationV128Neg{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Neg: + c.emit( + &OperationV128Neg{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecI8x16Add: + c.emit( + &OperationV128Add{Shape: ShapeI8x16}, + ) + case wasm.OpcodeVecI16x8Add: + c.emit( + &OperationV128Add{Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI32x4Add: + c.emit( + &OperationV128Add{Shape: ShapeI32x4}, + ) + case wasm.OpcodeVecI64x2Add: + c.emit( + &OperationV128Add{Shape: ShapeI64x2}, + ) + case wasm.OpcodeVecF32x4Add: + c.emit( + &OperationV128Add{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Add: + c.emit( + &OperationV128Add{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecI8x16Sub: + c.emit( + &OperationV128Sub{Shape: ShapeI8x16}, + ) + case wasm.OpcodeVecI16x8Sub: + c.emit( + &OperationV128Sub{Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI32x4Sub: + c.emit( + &OperationV128Sub{Shape: ShapeI32x4}, + ) + case wasm.OpcodeVecI64x2Sub: + c.emit( + &OperationV128Sub{Shape: ShapeI64x2}, + ) + case wasm.OpcodeVecF32x4Sub: + c.emit( + &OperationV128Sub{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Sub: + c.emit( + &OperationV128Sub{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecI8x16AddSatS: + c.emit( + &OperationV128AddSat{Shape: ShapeI8x16, Signed: true}, + ) + case wasm.OpcodeVecI8x16AddSatU: + c.emit( + &OperationV128AddSat{Shape: ShapeI8x16, Signed: false}, + ) + case wasm.OpcodeVecI16x8AddSatS: + c.emit( + &OperationV128AddSat{Shape: ShapeI16x8, Signed: true}, + ) + case wasm.OpcodeVecI16x8AddSatU: + c.emit( + &OperationV128AddSat{Shape: ShapeI16x8, Signed: false}, + ) + case wasm.OpcodeVecI8x16SubSatS: + c.emit( + &OperationV128SubSat{Shape: ShapeI8x16, Signed: true}, + ) + case wasm.OpcodeVecI8x16SubSatU: + c.emit( + &OperationV128SubSat{Shape: ShapeI8x16, Signed: false}, + ) + case wasm.OpcodeVecI16x8SubSatS: + c.emit( + &OperationV128SubSat{Shape: ShapeI16x8, Signed: true}, + ) + case wasm.OpcodeVecI16x8SubSatU: + c.emit( + &OperationV128SubSat{Shape: ShapeI16x8, Signed: false}, + ) + case wasm.OpcodeVecI16x8Mul: + c.emit( + &OperationV128Mul{Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI32x4Mul: + c.emit( + &OperationV128Mul{Shape: ShapeI32x4}, + ) + case wasm.OpcodeVecI64x2Mul: + c.emit( + &OperationV128Mul{Shape: ShapeI64x2}, + ) + case wasm.OpcodeVecF32x4Mul: + c.emit( + &OperationV128Mul{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Mul: + c.emit( + &OperationV128Mul{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF32x4Sqrt: + c.emit( + &OperationV128Sqrt{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Sqrt: + c.emit( + &OperationV128Sqrt{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF32x4Div: + c.emit( + &OperationV128Div{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Div: + c.emit( + &OperationV128Div{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecI8x16Abs: + c.emit( + &OperationV128Abs{Shape: ShapeI8x16}, + ) + case wasm.OpcodeVecI8x16Popcnt: + c.emit( + &OperationV128Popcnt{}, + ) + case wasm.OpcodeVecI16x8Abs: + c.emit( + &OperationV128Abs{Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI32x4Abs: + c.emit( + &OperationV128Abs{Shape: ShapeI32x4}, + ) + case wasm.OpcodeVecI64x2Abs: + c.emit( + &OperationV128Abs{Shape: ShapeI64x2}, + ) + case wasm.OpcodeVecF32x4Abs: + c.emit( + &OperationV128Abs{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Abs: + c.emit( + &OperationV128Abs{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecI8x16MinS: + c.emit( + &OperationV128Min{Signed: true, Shape: ShapeI8x16}, + ) + case wasm.OpcodeVecI8x16MinU: + c.emit( + &OperationV128Min{Shape: ShapeI8x16}, + ) + case wasm.OpcodeVecI8x16MaxS: + c.emit( + &OperationV128Max{Shape: ShapeI8x16, Signed: true}, + ) + case wasm.OpcodeVecI8x16MaxU: + c.emit( + &OperationV128Max{Shape: ShapeI8x16}, + ) + case wasm.OpcodeVecI8x16AvgrU: + c.emit( + &OperationV128AvgrU{Shape: ShapeI8x16}, + ) + case wasm.OpcodeVecI16x8MinS: + c.emit( + &OperationV128Min{Signed: true, Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI16x8MinU: + c.emit( + &OperationV128Min{Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI16x8MaxS: + c.emit( + &OperationV128Max{Shape: ShapeI16x8, Signed: true}, + ) + case wasm.OpcodeVecI16x8MaxU: + c.emit( + &OperationV128Max{Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI16x8AvgrU: + c.emit( + &OperationV128AvgrU{Shape: ShapeI16x8}, + ) + case wasm.OpcodeVecI32x4MinS: + c.emit( + &OperationV128Min{Signed: true, Shape: ShapeI32x4}, + ) + case wasm.OpcodeVecI32x4MinU: + c.emit( + &OperationV128Min{Shape: ShapeI32x4}, + ) + case wasm.OpcodeVecI32x4MaxS: + c.emit( + &OperationV128Max{Shape: ShapeI32x4, Signed: true}, + ) + case wasm.OpcodeVecI32x4MaxU: + c.emit( + &OperationV128Max{Shape: ShapeI32x4}, + ) + case wasm.OpcodeVecF32x4Min: + c.emit( + &OperationV128Min{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF32x4Max: + c.emit( + &OperationV128Max{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Min: + c.emit( + &OperationV128Min{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF64x2Max: + c.emit( + &OperationV128Max{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF32x4Pmin: + c.emit( + &OperationV128Pmin{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF32x4Pmax: + c.emit( + &OperationV128Pmax{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Pmin: + c.emit( + &OperationV128Pmin{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF64x2Pmax: + c.emit( + &OperationV128Pmax{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF32x4Ceil: + c.emit( + &OperationV128Ceil{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF32x4Floor: + c.emit( + &OperationV128Floor{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF32x4Trunc: + c.emit( + &OperationV128Trunc{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF32x4Nearest: + c.emit( + &OperationV128Nearest{Shape: ShapeF32x4}, + ) + case wasm.OpcodeVecF64x2Ceil: + c.emit( + &OperationV128Ceil{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF64x2Floor: + c.emit( + &OperationV128Floor{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF64x2Trunc: + c.emit( + &OperationV128Trunc{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecF64x2Nearest: + c.emit( + &OperationV128Nearest{Shape: ShapeF64x2}, + ) + case wasm.OpcodeVecI16x8ExtendLowI8x16S: + c.emit( + &OperationV128Extend{OriginShape: ShapeI8x16, Signed: true, UseLow: true}, + ) + case wasm.OpcodeVecI16x8ExtendHighI8x16S: + c.emit( + &OperationV128Extend{OriginShape: ShapeI8x16, Signed: true, UseLow: false}, + ) + case wasm.OpcodeVecI16x8ExtendLowI8x16U: + c.emit( + &OperationV128Extend{OriginShape: ShapeI8x16, Signed: false, UseLow: true}, + ) + case wasm.OpcodeVecI16x8ExtendHighI8x16U: + c.emit( + &OperationV128Extend{OriginShape: ShapeI8x16, Signed: false, UseLow: false}, + ) + case wasm.OpcodeVecI32x4ExtendLowI16x8S: + c.emit( + &OperationV128Extend{OriginShape: ShapeI16x8, Signed: true, UseLow: true}, + ) + case wasm.OpcodeVecI32x4ExtendHighI16x8S: + c.emit( + &OperationV128Extend{OriginShape: ShapeI16x8, Signed: true, UseLow: false}, + ) + case wasm.OpcodeVecI32x4ExtendLowI16x8U: + c.emit( + &OperationV128Extend{OriginShape: ShapeI16x8, Signed: false, UseLow: true}, + ) + case wasm.OpcodeVecI32x4ExtendHighI16x8U: + c.emit( + &OperationV128Extend{OriginShape: ShapeI16x8, Signed: false, UseLow: false}, + ) + case wasm.OpcodeVecI64x2ExtendLowI32x4S: + c.emit( + &OperationV128Extend{OriginShape: ShapeI32x4, Signed: true, UseLow: true}, + ) + case wasm.OpcodeVecI64x2ExtendHighI32x4S: + c.emit( + &OperationV128Extend{OriginShape: ShapeI32x4, Signed: true, UseLow: false}, + ) + case wasm.OpcodeVecI64x2ExtendLowI32x4U: + c.emit( + &OperationV128Extend{OriginShape: ShapeI32x4, Signed: false, UseLow: true}, + ) + case wasm.OpcodeVecI64x2ExtendHighI32x4U: + c.emit( + &OperationV128Extend{OriginShape: ShapeI32x4, Signed: false, UseLow: false}, + ) + case wasm.OpcodeVecI16x8Q15mulrSatS: + c.emit( + &OperationV128Q15mulrSatS{}, + ) + case wasm.OpcodeVecI16x8ExtMulLowI8x16S: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI8x16, Signed: true, UseLow: true}, + ) + case wasm.OpcodeVecI16x8ExtMulHighI8x16S: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI8x16, Signed: true, UseLow: false}, + ) + case wasm.OpcodeVecI16x8ExtMulLowI8x16U: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI8x16, Signed: false, UseLow: true}, + ) + case wasm.OpcodeVecI16x8ExtMulHighI8x16U: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI8x16, Signed: false, UseLow: false}, + ) + case wasm.OpcodeVecI32x4ExtMulLowI16x8S: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI16x8, Signed: true, UseLow: true}, + ) + case wasm.OpcodeVecI32x4ExtMulHighI16x8S: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI16x8, Signed: true, UseLow: false}, + ) + case wasm.OpcodeVecI32x4ExtMulLowI16x8U: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI16x8, Signed: false, UseLow: true}, + ) + case wasm.OpcodeVecI32x4ExtMulHighI16x8U: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI16x8, Signed: false, UseLow: false}, + ) + case wasm.OpcodeVecI64x2ExtMulLowI32x4S: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI32x4, Signed: true, UseLow: true}, + ) + case wasm.OpcodeVecI64x2ExtMulHighI32x4S: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI32x4, Signed: true, UseLow: false}, + ) + case wasm.OpcodeVecI64x2ExtMulLowI32x4U: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI32x4, Signed: false, UseLow: true}, + ) + case wasm.OpcodeVecI64x2ExtMulHighI32x4U: + c.emit( + &OperationV128ExtMul{OriginShape: ShapeI32x4, Signed: false, UseLow: false}, + ) + case wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S: + c.emit( + &OperationV128ExtAddPairwise{OriginShape: ShapeI8x16, Signed: true}, + ) + case wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U: + c.emit( + &OperationV128ExtAddPairwise{OriginShape: ShapeI8x16, Signed: false}, + ) + case wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S: + c.emit( + &OperationV128ExtAddPairwise{OriginShape: ShapeI16x8, Signed: true}, + ) + case wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U: + c.emit( + &OperationV128ExtAddPairwise{OriginShape: ShapeI16x8, Signed: false}, + ) + case wasm.OpcodeVecF64x2PromoteLowF32x4Zero: + c.emit( + &OperationV128FloatPromote{}, + ) + case wasm.OpcodeVecF32x4DemoteF64x2Zero: + c.emit( + &OperationV128FloatDemote{}, + ) + case wasm.OpcodeVecF32x4ConvertI32x4S: + c.emit( + &OperationV128FConvertFromI{DestinationShape: ShapeF32x4, Signed: true}, + ) + case wasm.OpcodeVecF32x4ConvertI32x4U: + c.emit( + &OperationV128FConvertFromI{DestinationShape: ShapeF32x4, Signed: false}, + ) + case wasm.OpcodeVecF64x2ConvertLowI32x4S: + c.emit( + &OperationV128FConvertFromI{DestinationShape: ShapeF64x2, Signed: true}, + ) + case wasm.OpcodeVecF64x2ConvertLowI32x4U: + c.emit( + &OperationV128FConvertFromI{DestinationShape: ShapeF64x2, Signed: false}, + ) + case wasm.OpcodeVecI32x4DotI16x8S: + c.emit( + &OperationV128Dot{}, + ) + case wasm.OpcodeVecI8x16NarrowI16x8S: + c.emit( + &OperationV128Narrow{OriginShape: ShapeI16x8, Signed: true}, + ) + case wasm.OpcodeVecI8x16NarrowI16x8U: + c.emit( + &OperationV128Narrow{OriginShape: ShapeI16x8, Signed: false}, + ) + case wasm.OpcodeVecI16x8NarrowI32x4S: + c.emit( + &OperationV128Narrow{OriginShape: ShapeI32x4, Signed: true}, + ) + case wasm.OpcodeVecI16x8NarrowI32x4U: + c.emit( + &OperationV128Narrow{OriginShape: ShapeI32x4, Signed: false}, + ) + case wasm.OpcodeVecI32x4TruncSatF32x4S: + c.emit( + &OperationV128ITruncSatFromF{OriginShape: ShapeF32x4, Signed: true}, + ) + case wasm.OpcodeVecI32x4TruncSatF32x4U: + c.emit( + &OperationV128ITruncSatFromF{OriginShape: ShapeF32x4, Signed: false}, + ) + case wasm.OpcodeVecI32x4TruncSatF64x2SZero: + c.emit( + &OperationV128ITruncSatFromF{OriginShape: ShapeF64x2, Signed: true}, + ) + case wasm.OpcodeVecI32x4TruncSatF64x2UZero: + c.emit( + &OperationV128ITruncSatFromF{OriginShape: ShapeF64x2, Signed: false}, + ) default: return fmt.Errorf("unsupported vector instruction in wazeroir: %s", wasm.VectorInstructionName(vecOp)) } diff --git a/internal/wazeroir/compiler_test.go b/internal/wazeroir/compiler_test.go index 640835f9530..e81f559831c 100644 --- a/internal/wazeroir/compiler_test.go +++ b/internal/wazeroir/compiler_test.go @@ -1256,102 +1256,102 @@ func TestCompile_Vec(t *testing.T) { { name: wasm.OpcodeVecV128LoadName, body: load(wasm.OpcodeVecV128Load, 0, 0), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type128, Arg: &MemoryArg{Alignment: 0, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType128, Arg: &MemoryArg{Alignment: 0, Offset: 0}}, }, { name: wasm.OpcodeVecV128LoadName + "/align=4", body: load(wasm.OpcodeVecV128Load, 0, 4), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type128, Arg: &MemoryArg{Alignment: 4, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType128, Arg: &MemoryArg{Alignment: 4, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load8x8SName, body: load(wasm.OpcodeVecV128Load8x8s, 1, 0), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type8x8s, Arg: &MemoryArg{Alignment: 0, Offset: 1}}, + expected: &OperationV128Load{Type: V128LoadType8x8s, Arg: &MemoryArg{Alignment: 0, Offset: 1}}, }, { name: wasm.OpcodeVecV128Load8x8SName + "/align=1", body: load(wasm.OpcodeVecV128Load8x8s, 0, 1), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type8x8s, Arg: &MemoryArg{Alignment: 1, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType8x8s, Arg: &MemoryArg{Alignment: 1, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load8x8UName, body: load(wasm.OpcodeVecV128Load8x8u, 0, 0), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type8x8u, Arg: &MemoryArg{Alignment: 0, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType8x8u, Arg: &MemoryArg{Alignment: 0, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load8x8UName + "/align=1", body: load(wasm.OpcodeVecV128Load8x8u, 0, 1), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type8x8u, Arg: &MemoryArg{Alignment: 1, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType8x8u, Arg: &MemoryArg{Alignment: 1, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load16x4SName, body: load(wasm.OpcodeVecV128Load16x4s, 1, 0), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type16x4s, Arg: &MemoryArg{Alignment: 0, Offset: 1}}, + expected: &OperationV128Load{Type: V128LoadType16x4s, Arg: &MemoryArg{Alignment: 0, Offset: 1}}, }, { name: wasm.OpcodeVecV128Load16x4SName + "/align=2", body: load(wasm.OpcodeVecV128Load16x4s, 0, 2), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type16x4s, Arg: &MemoryArg{Alignment: 2, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType16x4s, Arg: &MemoryArg{Alignment: 2, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load16x4UName, body: load(wasm.OpcodeVecV128Load16x4u, 0, 0), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type16x4u, Arg: &MemoryArg{Alignment: 0, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType16x4u, Arg: &MemoryArg{Alignment: 0, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load16x4UName + "/align=2", body: load(wasm.OpcodeVecV128Load16x4u, 0, 2), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type16x4u, Arg: &MemoryArg{Alignment: 2, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType16x4u, Arg: &MemoryArg{Alignment: 2, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load32x2SName, body: load(wasm.OpcodeVecV128Load32x2s, 1, 0), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type32x2s, Arg: &MemoryArg{Alignment: 0, Offset: 1}}, + expected: &OperationV128Load{Type: V128LoadType32x2s, Arg: &MemoryArg{Alignment: 0, Offset: 1}}, }, { name: wasm.OpcodeVecV128Load32x2SName + "/align=3", body: load(wasm.OpcodeVecV128Load32x2s, 0, 3), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type32x2s, Arg: &MemoryArg{Alignment: 3, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType32x2s, Arg: &MemoryArg{Alignment: 3, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load32x2UName, body: load(wasm.OpcodeVecV128Load32x2u, 0, 0), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type32x2u, Arg: &MemoryArg{Alignment: 0, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType32x2u, Arg: &MemoryArg{Alignment: 0, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load32x2UName + "/align=3", body: load(wasm.OpcodeVecV128Load32x2u, 0, 3), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type32x2u, Arg: &MemoryArg{Alignment: 3, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType32x2u, Arg: &MemoryArg{Alignment: 3, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load8SplatName, body: load(wasm.OpcodeVecV128Load8Splat, 2, 0), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type8Splat, Arg: &MemoryArg{Alignment: 0, Offset: 2}}, + expected: &OperationV128Load{Type: V128LoadType8Splat, Arg: &MemoryArg{Alignment: 0, Offset: 2}}, }, { name: wasm.OpcodeVecV128Load16SplatName, body: load(wasm.OpcodeVecV128Load16Splat, 0, 1), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type16Splat, Arg: &MemoryArg{Alignment: 1, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType16Splat, Arg: &MemoryArg{Alignment: 1, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load32SplatName, body: load(wasm.OpcodeVecV128Load32Splat, 3, 2), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type32Splat, Arg: &MemoryArg{Alignment: 2, Offset: 3}}, + expected: &OperationV128Load{Type: V128LoadType32Splat, Arg: &MemoryArg{Alignment: 2, Offset: 3}}, }, { name: wasm.OpcodeVecV128Load64SplatName, body: load(wasm.OpcodeVecV128Load64Splat, 0, 3), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type64Splat, Arg: &MemoryArg{Alignment: 3, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType64Splat, Arg: &MemoryArg{Alignment: 3, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load32zeroName, body: load(wasm.OpcodeVecV128Load32zero, 0, 2), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type32zero, Arg: &MemoryArg{Alignment: 2, Offset: 0}}, + expected: &OperationV128Load{Type: V128LoadType32zero, Arg: &MemoryArg{Alignment: 2, Offset: 0}}, }, { name: wasm.OpcodeVecV128Load64zeroName, body: load(wasm.OpcodeVecV128Load64zero, 5, 3), needDropBeforeReturn: true, - expected: &OperationV128Load{Type: LoadV128Type64zero, Arg: &MemoryArg{Alignment: 3, Offset: 5}}, + expected: &OperationV128Load{Type: V128LoadType64zero, Arg: &MemoryArg{Alignment: 3, Offset: 5}}, }, {name: wasm.OpcodeVecV128Load8LaneName, needDropBeforeReturn: true, body: loadLane(wasm.OpcodeVecV128Load8Lane, 5, 0, 10), @@ -1899,6 +1899,535 @@ func TestCompile_Vec(t *testing.T) { needDropBeforeReturn: true, expected: &OperationV128AnyTrue{}, }, + {name: wasm.OpcodeVecI8x16AddName, body: vv2v(wasm.OpcodeVecI8x16Add), + needDropBeforeReturn: true, + expected: &OperationV128Add{Shape: ShapeI8x16}, + }, + {name: wasm.OpcodeVecI8x16AddSatSName, body: vv2v(wasm.OpcodeVecI8x16AddSatS), + needDropBeforeReturn: true, + expected: &OperationV128AddSat{Shape: ShapeI8x16, Signed: true}, + }, + {name: wasm.OpcodeVecI8x16AddSatUName, body: vv2v(wasm.OpcodeVecI8x16AddSatU), + needDropBeforeReturn: true, + expected: &OperationV128AddSat{Shape: ShapeI8x16, Signed: false}, + }, + {name: wasm.OpcodeVecI8x16SubName, body: vv2v(wasm.OpcodeVecI8x16Sub), + needDropBeforeReturn: true, + expected: &OperationV128Sub{Shape: ShapeI8x16}, + }, + {name: wasm.OpcodeVecI8x16SubSatSName, body: vv2v(wasm.OpcodeVecI8x16SubSatS), + needDropBeforeReturn: true, + expected: &OperationV128SubSat{Shape: ShapeI8x16, Signed: true}, + }, + {name: wasm.OpcodeVecI8x16SubSatUName, body: vv2v(wasm.OpcodeVecI8x16SubSatU), + needDropBeforeReturn: true, + expected: &OperationV128SubSat{Shape: ShapeI8x16, Signed: false}, + }, + {name: wasm.OpcodeVecI16x8AddName, body: vv2v(wasm.OpcodeVecI16x8Add), + needDropBeforeReturn: true, + expected: &OperationV128Add{Shape: ShapeI16x8}, + }, + {name: wasm.OpcodeVecI16x8AddSatSName, body: vv2v(wasm.OpcodeVecI16x8AddSatS), + needDropBeforeReturn: true, + expected: &OperationV128AddSat{Shape: ShapeI16x8, Signed: true}, + }, + {name: wasm.OpcodeVecI16x8AddSatUName, body: vv2v(wasm.OpcodeVecI16x8AddSatU), + needDropBeforeReturn: true, + expected: &OperationV128AddSat{Shape: ShapeI16x8, Signed: false}, + }, + {name: wasm.OpcodeVecI16x8SubName, body: vv2v(wasm.OpcodeVecI16x8Sub), + needDropBeforeReturn: true, + expected: &OperationV128Sub{Shape: ShapeI16x8}, + }, + {name: wasm.OpcodeVecI16x8SubSatSName, body: vv2v(wasm.OpcodeVecI16x8SubSatS), + needDropBeforeReturn: true, + expected: &OperationV128SubSat{Shape: ShapeI16x8, Signed: true}, + }, + {name: wasm.OpcodeVecI16x8SubSatUName, body: vv2v(wasm.OpcodeVecI16x8SubSatU), + needDropBeforeReturn: true, + expected: &OperationV128SubSat{Shape: ShapeI16x8, Signed: false}, + }, + {name: wasm.OpcodeVecI16x8MulName, body: vv2v(wasm.OpcodeVecI16x8Mul), + needDropBeforeReturn: true, + expected: &OperationV128Mul{Shape: ShapeI16x8}, + }, + {name: wasm.OpcodeVecI32x4AddName, body: vv2v(wasm.OpcodeVecI32x4Add), + needDropBeforeReturn: true, + expected: &OperationV128Add{Shape: ShapeI32x4}, + }, + {name: wasm.OpcodeVecI32x4SubName, body: vv2v(wasm.OpcodeVecI32x4Sub), + needDropBeforeReturn: true, + expected: &OperationV128Sub{Shape: ShapeI32x4}, + }, + {name: wasm.OpcodeVecI32x4MulName, body: vv2v(wasm.OpcodeVecI32x4Mul), + needDropBeforeReturn: true, + expected: &OperationV128Mul{Shape: ShapeI32x4}, + }, + {name: wasm.OpcodeVecI64x2AddName, body: vv2v(wasm.OpcodeVecI64x2Add), + needDropBeforeReturn: true, + expected: &OperationV128Add{Shape: ShapeI64x2}, + }, + {name: wasm.OpcodeVecI64x2SubName, body: vv2v(wasm.OpcodeVecI64x2Sub), + needDropBeforeReturn: true, + expected: &OperationV128Sub{Shape: ShapeI64x2}, + }, + {name: wasm.OpcodeVecI64x2MulName, body: vv2v(wasm.OpcodeVecI64x2Mul), + needDropBeforeReturn: true, + expected: &OperationV128Mul{Shape: ShapeI64x2}, + }, + {name: wasm.OpcodeVecF32x4AddName, body: vv2v(wasm.OpcodeVecF32x4Add), + needDropBeforeReturn: true, + expected: &OperationV128Add{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF32x4SubName, body: vv2v(wasm.OpcodeVecF32x4Sub), + needDropBeforeReturn: true, + expected: &OperationV128Sub{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF32x4MulName, body: vv2v(wasm.OpcodeVecF32x4Mul), + needDropBeforeReturn: true, + expected: &OperationV128Mul{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF32x4DivName, body: vv2v(wasm.OpcodeVecF32x4Div), + needDropBeforeReturn: true, + expected: &OperationV128Div{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF64x2AddName, body: vv2v(wasm.OpcodeVecF64x2Add), + needDropBeforeReturn: true, + expected: &OperationV128Add{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF64x2SubName, body: vv2v(wasm.OpcodeVecF64x2Sub), + needDropBeforeReturn: true, + expected: &OperationV128Sub{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF64x2MulName, body: vv2v(wasm.OpcodeVecF64x2Mul), + needDropBeforeReturn: true, + expected: &OperationV128Mul{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF64x2DivName, body: vv2v(wasm.OpcodeVecF64x2Div), + needDropBeforeReturn: true, + expected: &OperationV128Div{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecI8x16MinSName, body: vv2v(wasm.OpcodeVecI8x16MinS), + needDropBeforeReturn: true, + expected: &OperationV128Min{Shape: ShapeI8x16, Signed: true}, + }, + {name: wasm.OpcodeVecI8x16MinUName, body: vv2v(wasm.OpcodeVecI8x16MinU), + needDropBeforeReturn: true, + expected: &OperationV128Min{Shape: ShapeI8x16}, + }, + {name: wasm.OpcodeVecI8x16MaxSName, body: vv2v(wasm.OpcodeVecI8x16MaxS), + needDropBeforeReturn: true, + expected: &OperationV128Max{Shape: ShapeI8x16, Signed: true}, + }, + {name: wasm.OpcodeVecI8x16MaxUName, body: vv2v(wasm.OpcodeVecI8x16MaxU), + needDropBeforeReturn: true, + expected: &OperationV128Max{Shape: ShapeI8x16}, + }, + {name: wasm.OpcodeVecI8x16AvgrUName, body: vv2v(wasm.OpcodeVecI8x16AvgrU), + needDropBeforeReturn: true, + expected: &OperationV128AvgrU{Shape: ShapeI8x16}, + }, + {name: wasm.OpcodeVecI16x8MinSName, body: vv2v(wasm.OpcodeVecI16x8MinS), + needDropBeforeReturn: true, + expected: &OperationV128Min{Shape: ShapeI16x8, Signed: true}, + }, + {name: wasm.OpcodeVecI16x8MinUName, body: vv2v(wasm.OpcodeVecI16x8MinU), + needDropBeforeReturn: true, + expected: &OperationV128Min{Shape: ShapeI16x8}, + }, + {name: wasm.OpcodeVecI16x8MaxSName, body: vv2v(wasm.OpcodeVecI16x8MaxS), + needDropBeforeReturn: true, + expected: &OperationV128Max{Shape: ShapeI16x8, Signed: true}, + }, + {name: wasm.OpcodeVecI16x8MaxUName, body: vv2v(wasm.OpcodeVecI16x8MaxU), + needDropBeforeReturn: true, + expected: &OperationV128Max{Shape: ShapeI16x8}, + }, + {name: wasm.OpcodeVecI16x8AvgrUName, body: vv2v(wasm.OpcodeVecI16x8AvgrU), + needDropBeforeReturn: true, + expected: &OperationV128AvgrU{Shape: ShapeI16x8}, + }, + {name: wasm.OpcodeVecI32x4MinSName, body: vv2v(wasm.OpcodeVecI32x4MinS), + needDropBeforeReturn: true, + expected: &OperationV128Min{Shape: ShapeI32x4, Signed: true}, + }, + {name: wasm.OpcodeVecI32x4MinUName, body: vv2v(wasm.OpcodeVecI32x4MinU), + needDropBeforeReturn: true, + expected: &OperationV128Min{Shape: ShapeI32x4}, + }, + {name: wasm.OpcodeVecI32x4MaxSName, body: vv2v(wasm.OpcodeVecI32x4MaxS), + needDropBeforeReturn: true, + expected: &OperationV128Max{Shape: ShapeI32x4, Signed: true}, + }, + {name: wasm.OpcodeVecI32x4MaxUName, body: vv2v(wasm.OpcodeVecI32x4MaxU), + needDropBeforeReturn: true, + expected: &OperationV128Max{Shape: ShapeI32x4}, + }, + {name: wasm.OpcodeVecF32x4MinName, body: vv2v(wasm.OpcodeVecF32x4Min), + needDropBeforeReturn: true, + expected: &OperationV128Min{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF32x4MaxName, body: vv2v(wasm.OpcodeVecF32x4Max), + needDropBeforeReturn: true, + expected: &OperationV128Max{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF64x2MinName, body: vv2v(wasm.OpcodeVecF64x2Min), + needDropBeforeReturn: true, + expected: &OperationV128Min{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF64x2MaxName, body: vv2v(wasm.OpcodeVecF64x2Max), + needDropBeforeReturn: true, + expected: &OperationV128Max{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecI8x16AbsName, body: v2v(wasm.OpcodeVecI8x16Abs), + needDropBeforeReturn: true, + expected: &OperationV128Abs{Shape: ShapeI8x16}, + }, + {name: wasm.OpcodeVecI8x16PopcntName, body: v2v(wasm.OpcodeVecI8x16Popcnt), + needDropBeforeReturn: true, + expected: &OperationV128Popcnt{}, + }, + {name: wasm.OpcodeVecI16x8AbsName, body: v2v(wasm.OpcodeVecI16x8Abs), + needDropBeforeReturn: true, + expected: &OperationV128Abs{Shape: ShapeI16x8}, + }, + {name: wasm.OpcodeVecI32x4AbsName, body: v2v(wasm.OpcodeVecI32x4Abs), + needDropBeforeReturn: true, + expected: &OperationV128Abs{Shape: ShapeI32x4}, + }, + {name: wasm.OpcodeVecI64x2AbsName, body: v2v(wasm.OpcodeVecI64x2Abs), + needDropBeforeReturn: true, + expected: &OperationV128Abs{Shape: ShapeI64x2}, + }, + {name: wasm.OpcodeVecF32x4AbsName, body: v2v(wasm.OpcodeVecF32x4Abs), + needDropBeforeReturn: true, + expected: &OperationV128Abs{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF64x2AbsName, body: v2v(wasm.OpcodeVecF64x2Abs), + needDropBeforeReturn: true, + expected: &OperationV128Abs{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF32x4CeilName, body: v2v(wasm.OpcodeVecF32x4Ceil), + needDropBeforeReturn: true, + expected: &OperationV128Ceil{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF32x4FloorName, body: v2v(wasm.OpcodeVecF32x4Floor), + needDropBeforeReturn: true, + expected: &OperationV128Floor{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF32x4TruncName, body: v2v(wasm.OpcodeVecF32x4Trunc), + needDropBeforeReturn: true, + expected: &OperationV128Trunc{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF32x4NearestName, body: v2v(wasm.OpcodeVecF32x4Nearest), + needDropBeforeReturn: true, + expected: &OperationV128Nearest{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF64x2CeilName, body: v2v(wasm.OpcodeVecF64x2Ceil), + needDropBeforeReturn: true, + expected: &OperationV128Ceil{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF64x2FloorName, body: v2v(wasm.OpcodeVecF64x2Floor), + needDropBeforeReturn: true, + expected: &OperationV128Floor{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF64x2TruncName, body: v2v(wasm.OpcodeVecF64x2Trunc), + needDropBeforeReturn: true, + expected: &OperationV128Trunc{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF64x2NearestName, body: v2v(wasm.OpcodeVecF64x2Nearest), + needDropBeforeReturn: true, + expected: &OperationV128Nearest{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF32x4PminName, body: vv2v(wasm.OpcodeVecF32x4Pmin), + needDropBeforeReturn: true, + expected: &OperationV128Pmin{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF32x4PmaxName, body: vv2v(wasm.OpcodeVecF32x4Pmax), + needDropBeforeReturn: true, + expected: &OperationV128Pmax{Shape: ShapeF32x4}, + }, + {name: wasm.OpcodeVecF64x2PminName, body: vv2v(wasm.OpcodeVecF64x2Pmin), + needDropBeforeReturn: true, + expected: &OperationV128Pmin{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecF64x2PmaxName, body: vv2v(wasm.OpcodeVecF64x2Pmax), + needDropBeforeReturn: true, + expected: &OperationV128Pmax{Shape: ShapeF64x2}, + }, + {name: wasm.OpcodeVecI16x8Q15mulrSatSName, body: vv2v(wasm.OpcodeVecI16x8Q15mulrSatS), + needDropBeforeReturn: true, + expected: &OperationV128Q15mulrSatS{}, + }, + {name: wasm.OpcodeVecI16x8ExtMulLowI8x16SName, body: vv2v(wasm.OpcodeVecI16x8ExtMulLowI8x16S), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI8x16, + Signed: true, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI16x8ExtMulHighI8x16SName, body: vv2v(wasm.OpcodeVecI16x8ExtMulHighI8x16S), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI8x16, + Signed: true, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI16x8ExtMulLowI8x16UName, body: vv2v(wasm.OpcodeVecI16x8ExtMulLowI8x16U), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI8x16, + Signed: false, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI16x8ExtMulHighI8x16UName, body: vv2v(wasm.OpcodeVecI16x8ExtMulHighI8x16U), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI8x16, + Signed: false, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI32x4ExtMulLowI16x8SName, body: vv2v(wasm.OpcodeVecI32x4ExtMulLowI16x8S), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI16x8, + Signed: true, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI32x4ExtMulHighI16x8SName, body: vv2v(wasm.OpcodeVecI32x4ExtMulHighI16x8S), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI16x8, + Signed: true, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI32x4ExtMulLowI16x8UName, body: vv2v(wasm.OpcodeVecI32x4ExtMulLowI16x8U), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI16x8, + Signed: false, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI32x4ExtMulHighI16x8UName, body: vv2v(wasm.OpcodeVecI32x4ExtMulHighI16x8U), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI16x8, + Signed: false, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI64x2ExtMulLowI32x4SName, body: vv2v(wasm.OpcodeVecI64x2ExtMulLowI32x4S), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI32x4, + Signed: true, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI64x2ExtMulHighI32x4SName, body: vv2v(wasm.OpcodeVecI64x2ExtMulHighI32x4S), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI32x4, + Signed: true, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI64x2ExtMulLowI32x4UName, body: vv2v(wasm.OpcodeVecI64x2ExtMulLowI32x4U), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI32x4, + Signed: false, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI64x2ExtMulHighI32x4UName, body: vv2v(wasm.OpcodeVecI64x2ExtMulHighI32x4U), + needDropBeforeReturn: true, + expected: &OperationV128ExtMul{ + OriginShape: ShapeI32x4, + Signed: false, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI16x8ExtendLowI8x16SName, body: v2v(wasm.OpcodeVecI16x8ExtendLowI8x16S), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI8x16, + Signed: true, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI16x8ExtendHighI8x16SName, body: v2v(wasm.OpcodeVecI16x8ExtendHighI8x16S), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI8x16, + Signed: true, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI16x8ExtendLowI8x16UName, body: v2v(wasm.OpcodeVecI16x8ExtendLowI8x16U), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI8x16, + Signed: false, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI16x8ExtendHighI8x16UName, body: v2v(wasm.OpcodeVecI16x8ExtendHighI8x16U), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI8x16, + Signed: false, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI32x4ExtendLowI16x8SName, body: v2v(wasm.OpcodeVecI32x4ExtendLowI16x8S), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI16x8, + Signed: true, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI32x4ExtendHighI16x8SName, body: v2v(wasm.OpcodeVecI32x4ExtendHighI16x8S), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI16x8, + Signed: true, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI32x4ExtendLowI16x8UName, body: v2v(wasm.OpcodeVecI32x4ExtendLowI16x8U), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI16x8, + Signed: false, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI32x4ExtendHighI16x8UName, body: v2v(wasm.OpcodeVecI32x4ExtendHighI16x8U), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI16x8, + Signed: false, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI64x2ExtendLowI32x4SName, body: v2v(wasm.OpcodeVecI64x2ExtendLowI32x4S), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI32x4, + Signed: true, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI64x2ExtendHighI32x4SName, body: v2v(wasm.OpcodeVecI64x2ExtendHighI32x4S), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI32x4, + Signed: true, + UseLow: false, + }, + }, + {name: wasm.OpcodeVecI64x2ExtendLowI32x4UName, body: v2v(wasm.OpcodeVecI64x2ExtendLowI32x4U), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI32x4, + Signed: false, + UseLow: true, + }, + }, + {name: wasm.OpcodeVecI64x2ExtendHighI32x4UName, body: v2v(wasm.OpcodeVecI64x2ExtendHighI32x4U), + needDropBeforeReturn: true, + expected: &OperationV128Extend{ + OriginShape: ShapeI32x4, + Signed: false, + UseLow: false, + }, + }, + + {name: wasm.OpcodeVecI16x8ExtaddPairwiseI8x16SName, body: v2v(wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S), + needDropBeforeReturn: true, + expected: &OperationV128ExtAddPairwise{OriginShape: ShapeI8x16, Signed: true}, + }, + {name: wasm.OpcodeVecI16x8ExtaddPairwiseI8x16UName, body: v2v(wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U), + needDropBeforeReturn: true, + expected: &OperationV128ExtAddPairwise{OriginShape: ShapeI8x16, Signed: false}, + }, + {name: wasm.OpcodeVecI32x4ExtaddPairwiseI16x8SName, body: v2v(wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S), + needDropBeforeReturn: true, + expected: &OperationV128ExtAddPairwise{OriginShape: ShapeI16x8, Signed: true}, + }, + {name: wasm.OpcodeVecI32x4ExtaddPairwiseI16x8UName, body: v2v(wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U), + needDropBeforeReturn: true, + expected: &OperationV128ExtAddPairwise{OriginShape: ShapeI16x8, Signed: false}, + }, + {name: wasm.OpcodeVecF64x2PromoteLowF32x4ZeroName, body: v2v(wasm.OpcodeVecF64x2PromoteLowF32x4Zero), + needDropBeforeReturn: true, + expected: &OperationV128FloatPromote{}, + }, + {name: wasm.OpcodeVecF32x4DemoteF64x2ZeroName, body: v2v(wasm.OpcodeVecF32x4DemoteF64x2Zero), + needDropBeforeReturn: true, + expected: &OperationV128FloatDemote{}, + }, + {name: wasm.OpcodeVecF32x4ConvertI32x4SName, body: v2v(wasm.OpcodeVecF32x4ConvertI32x4S), + needDropBeforeReturn: true, + expected: &OperationV128FConvertFromI{DestinationShape: ShapeF32x4, Signed: true}, + }, + {name: wasm.OpcodeVecF32x4ConvertI32x4UName, body: v2v(wasm.OpcodeVecF32x4ConvertI32x4U), + needDropBeforeReturn: true, + expected: &OperationV128FConvertFromI{DestinationShape: ShapeF32x4, Signed: false}, + }, + {name: wasm.OpcodeVecF64x2ConvertLowI32x4SName, body: v2v(wasm.OpcodeVecF64x2ConvertLowI32x4S), + needDropBeforeReturn: true, + expected: &OperationV128FConvertFromI{DestinationShape: ShapeF64x2, Signed: true}, + }, + {name: wasm.OpcodeVecF64x2ConvertLowI32x4UName, body: v2v(wasm.OpcodeVecF64x2ConvertLowI32x4U), + needDropBeforeReturn: true, + expected: &OperationV128FConvertFromI{DestinationShape: ShapeF64x2, Signed: false}, + }, + {name: wasm.OpcodeVecI32x4DotI16x8SName, body: vv2v(wasm.OpcodeVecI32x4DotI16x8S), + needDropBeforeReturn: true, + expected: &OperationV128Dot{}, + }, + {name: wasm.OpcodeVecI8x16NarrowI16x8SName, body: vv2v(wasm.OpcodeVecI8x16NarrowI16x8S), + needDropBeforeReturn: true, + expected: &OperationV128Narrow{OriginShape: ShapeI16x8, Signed: true}, + }, + {name: wasm.OpcodeVecI8x16NarrowI16x8UName, body: vv2v(wasm.OpcodeVecI8x16NarrowI16x8U), + needDropBeforeReturn: true, + expected: &OperationV128Narrow{OriginShape: ShapeI16x8, Signed: false}, + }, + {name: wasm.OpcodeVecI16x8NarrowI32x4SName, body: vv2v(wasm.OpcodeVecI16x8NarrowI32x4S), + needDropBeforeReturn: true, + expected: &OperationV128Narrow{OriginShape: ShapeI32x4, Signed: true}, + }, + {name: wasm.OpcodeVecI16x8NarrowI32x4UName, body: vv2v(wasm.OpcodeVecI16x8NarrowI32x4U), + needDropBeforeReturn: true, + expected: &OperationV128Narrow{OriginShape: ShapeI32x4, Signed: false}, + }, + {name: wasm.OpcodeVecI32x4TruncSatF32x4SName, body: v2v(wasm.OpcodeVecI32x4TruncSatF32x4S), + needDropBeforeReturn: true, + expected: &OperationV128ITruncSatFromF{OriginShape: ShapeF32x4, Signed: true}, + }, + {name: wasm.OpcodeVecI32x4TruncSatF32x4UName, body: v2v(wasm.OpcodeVecI32x4TruncSatF32x4U), + needDropBeforeReturn: true, + expected: &OperationV128ITruncSatFromF{OriginShape: ShapeF32x4, Signed: false}, + }, + {name: wasm.OpcodeVecI32x4TruncSatF64x2SZeroName, body: v2v(wasm.OpcodeVecI32x4TruncSatF64x2SZero), + needDropBeforeReturn: true, + expected: &OperationV128ITruncSatFromF{OriginShape: ShapeF64x2, Signed: true}, + }, + {name: wasm.OpcodeVecI32x4TruncSatF64x2UZeroName, body: v2v(wasm.OpcodeVecI32x4TruncSatF64x2UZero), + needDropBeforeReturn: true, + expected: &OperationV128ITruncSatFromF{OriginShape: ShapeF64x2, Signed: false}, + }, } for _, tt := range tests { diff --git a/internal/wazeroir/format.go b/internal/wazeroir/format.go index c665d032c5b..39772b31a49 100644 --- a/internal/wazeroir/format.go +++ b/internal/wazeroir/format.go @@ -72,9 +72,9 @@ func formatOperation(w io.StringWriter, b Operation) { case *OperationStore: str = fmt.Sprintf("%s.store (align=%d, offset=%d)", o.Type, o.Arg.Alignment, o.Arg.Offset) case *OperationStore8: - str = fmt.Sprintf("%s.store8 (align=%d, offset=%d)", o.Type, o.Arg.Alignment, o.Arg.Offset) + str = fmt.Sprintf("store8 (align=%d, offset=%d)", o.Arg.Alignment, o.Arg.Offset) case *OperationStore16: - str = fmt.Sprintf("%s.store16 (align=%d, offset=%d)", o.Type, o.Arg.Alignment, o.Arg.Offset) + str = fmt.Sprintf("store16 (align=%d, offset=%d)", o.Arg.Alignment, o.Arg.Offset) case *OperationStore32: str = fmt.Sprintf("i64.store32 (align=%d, offset=%d)", o.Arg.Alignment, o.Arg.Offset) case *OperationMemorySize: diff --git a/internal/wazeroir/operations.go b/internal/wazeroir/operations.go index db25340bae6..c7603c224a4 100644 --- a/internal/wazeroir/operations.go +++ b/internal/wazeroir/operations.go @@ -2,6 +2,7 @@ package wazeroir import "fmt" +// UnsignedInt represents unsigned 32-bit or 64-bit integers. type UnsignedInt byte const ( @@ -9,6 +10,7 @@ const ( UnsignedInt64 ) +// String implements fmt.Stringer. func (s UnsignedInt) String() (ret string) { switch s { case UnsignedInt32: @@ -19,6 +21,7 @@ func (s UnsignedInt) String() (ret string) { return } +// SignedInt represents signed or unsigned integers. type SignedInt byte const ( @@ -28,6 +31,7 @@ const ( SignedUint64 ) +// String implements fmt.Stringer. func (s SignedInt) String() (ret string) { switch s { case SignedUint32: @@ -42,6 +46,7 @@ func (s SignedInt) String() (ret string) { return } +// Float represents the scalar double or single precision floating points. type Float byte const ( @@ -49,6 +54,7 @@ const ( Float64 ) +// String implements fmt.Stringer. func (s Float) String() (ret string) { switch s { case Float32: @@ -59,6 +65,7 @@ func (s Float) String() (ret string) { return } +// UnsignedType is the union of UnsignedInt, Float and V128 vector type. type UnsignedType byte const ( @@ -70,6 +77,7 @@ const ( UnsignedTypeUnknown ) +// String implements fmt.Stringer. func (s UnsignedType) String() (ret string) { switch s { case UnsignedTypeI32: @@ -88,6 +96,7 @@ func (s UnsignedType) String() (ret string) { return } +// SignedType is the union of SignedInt and Float types. type SignedType byte const ( @@ -99,6 +108,7 @@ const ( SignedTypeFloat64 ) +// String implements fmt.Stringer. func (s SignedType) String() (ret string) { switch s { case SignedTypeInt32: @@ -117,12 +127,17 @@ func (s SignedType) String() (ret string) { return } +// Operation is the interface implemented by each individual operation. type Operation interface { + // Kind returns the kind of the implementation. Kind() OperationKind + // TODO String() } +// OperationKind is the kind of each implementation of Operation interface. type OperationKind uint16 +// String implements fmt.Stringer. func (o OperationKind) String() (ret string) { switch o { case OperationKindUnreachable: @@ -205,7 +220,6 @@ func (o OperationKind) String() (ret string) { ret = "Ctz" case OperationKindPopcnt: ret = "Popcnt" - case OperationKindDiv: ret = "Div" case OperationKindRem: @@ -348,6 +362,60 @@ func (o OperationKind) String() (ret string) { ret = "SignExtend64From16" case OperationKindSignExtend64From32: ret = "SignExtend64From32" + case OperationKindV128AddSat: + ret = "V128AddSat" + case OperationKindV128SubSat: + ret = "V128SubSat" + case OperationKindV128Mul: + ret = "V128Mul" + case OperationKindV128Div: + ret = "V128Div" + case OperationKindV128Neg: + ret = "V128Neg" + case OperationKindV128Sqrt: + ret = "V128Sqrt" + case OperationKindV128Abs: + ret = "V128Abs" + case OperationKindV128Popcnt: + ret = "V128Popcnt" + case OperationKindV128Min: + ret = "V128Min" + case OperationKindV128Max: + ret = "V128Max" + case OperationKindV128AvgrU: + ret = "V128AvgrU" + case OperationKindV128Ceil: + ret = "V128Ceil" + case OperationKindV128Floor: + ret = "V128Floor" + case OperationKindV128Trunc: + ret = "V128Trunc" + case OperationKindV128Nearest: + ret = "V128Nearest" + case OperationKindV128Pmin: + ret = "V128Pmin" + case OperationKindV128Pmax: + ret = "V128Pmax" + case OperationKindV128Extend: + ret = "V128Extend" + case OperationKindV128ExtMul: + ret = "V128ExtMul" + case OperationKindV128Q15mulrSatS: + ret = "V128Q15mulrSatS" + case OperationKindV128ExtAddPairwise: + ret = "V128ExtAddPairwise" + case OperationKindV128FloatPromote: + ret = "V128FloatPromote" + case OperationKindV128FloatDemote: + ret = "V128FloatDemote" + case OperationKindV128FConvertFromI: + ret = "V128FConvertFromI" + case OperationKindV128Dot: + ret = "V128Dot" + case OperationKindV128Narrow: + ret = "V128Narrow" + case OperationKindV128ITruncSatFromF: + ret = "V128ITruncSatFromF" default: panic(fmt.Errorf("unknown operation %d", o)) } @@ -355,130 +423,298 @@ func (o OperationKind) String() (ret string) { } const ( + // OperationKindUnreachable is the kind for OperationUnreachable. OperationKindUnreachable OperationKind = iota + // OperationKindLabel is the kind for OperationLabel. OperationKindLabel + // OperationKindBr is the kind for OperationBr. OperationKindBr + // OperationKindBrIf is the kind for OperationBrIf. OperationKindBrIf + // OperationKindBrTable is the kind for OperationBrTable. OperationKindBrTable + // OperationKindCall is the kind for OperationCall. OperationKindCall + // OperationKindCallIndirect is the kind for OperationCallIndirect. OperationKindCallIndirect + // OperationKindDrop is the kind for OperationDrop. OperationKindDrop + // OperationKindSelect is the kind for OperationSelect. OperationKindSelect + // OperationKindPick is the kind for OperationPick. OperationKindPick + // OperationKindSwap is the kind for OperationSwap. OperationKindSwap + // OperationKindGlobalGet is the kind for OperationGlobalGet. OperationKindGlobalGet + // OperationKindGlobalSet is the kind for OperationGlobalSet. OperationKindGlobalSet + // OperationKindLoad is the kind for OperationLoad. OperationKindLoad + // OperationKindLoad8 is the kind for OperationLoad8. OperationKindLoad8 + // OperationKindLoad16 is the kind for OperationLoad16. OperationKindLoad16 + // OperationKindLoad32 is the kind for OperationLoad32. OperationKindLoad32 + // OperationKindStore is the kind for OperationStore. OperationKindStore + // OperationKindStore8 is the kind for OperationStore8. OperationKindStore8 + // OperationKindStore16 is the kind for OperationStore16. OperationKindStore16 + // OperationKindStore32 is the kind for OperationStore32. OperationKindStore32 + // OperationKindMemorySize is the kind for OperationMemorySize. OperationKindMemorySize + // OperationKindMemoryGrow is the kind for OperationMemoryGrow. OperationKindMemoryGrow + // OperationKindConstI32 is the kind for OperationConstI32. OperationKindConstI32 + // OperationKindConstI64 is the kind for OperationConstI64. OperationKindConstI64 + // OperationKindConstF32 is the kind for OperationConstF32. OperationKindConstF32 + // OperationKindConstF64 is the kind for OperationConstF64. OperationKindConstF64 + // OperationKindEq is the kind for OperationEq. OperationKindEq + // OperationKindNe is the kind for OperationNe. OperationKindNe + // OperationKindEqz is the kind for OperationEqz. OperationKindEqz + // OperationKindLt is the kind for OperationLt. OperationKindLt + // OperationKindGt is the kind for OperationGt. OperationKindGt + // OperationKindLe is the kind for OperationLe. OperationKindLe + // OperationKindGe is the kind for OperationGe. OperationKindGe + // OperationKindAdd is the kind for OperationAdd. OperationKindAdd + // OperationKindSub is the kind for OperationSub. OperationKindSub + // OperationKindMul is the kind for OperationMul. OperationKindMul + // OperationKindClz is the kind for OperationClz. OperationKindClz + // OperationKindCtz is the kind for OperationCtz. OperationKindCtz + // OperationKindPopcnt is the kind for OperationPopcnt. OperationKindPopcnt + // OperationKindDiv is the kind for OperationDiv. OperationKindDiv + // OperationKindRem is the kind for OperationRem. OperationKindRem + // OperationKindAnd is the kind for OperationAnd. OperationKindAnd + // OperationKindOr is the kind for OperationOr. OperationKindOr + // OperationKindXor is the kind for OperationXor. OperationKindXor + // OperationKindShl is the kind for OperationShl. OperationKindShl + // OperationKindShr is the kind for OperationShr. OperationKindShr + // OperationKindRotl is the kind for OperationRotl. OperationKindRotl + // OperationKindRotr is the kind for OperationRotr. OperationKindRotr + // OperationKindAbs is the kind for OperationAbs. OperationKindAbs + // OperationKindNeg is the kind for OperationNeg. OperationKindNeg + // OperationKindCeil is the kind for OperationCeil. OperationKindCeil + // OperationKindFloor is the kind for OperationFloor. OperationKindFloor + // OperationKindTrunc is the kind for OperationTrunc. OperationKindTrunc + // OperationKindNearest is the kind for OperationNearest. OperationKindNearest + // OperationKindSqrt is the kind for OperationSqrt. OperationKindSqrt + // OperationKindMin is the kind for OperationMin. OperationKindMin + // OperationKindMax is the kind for OperationMax. OperationKindMax + // OperationKindCopysign is the kind for OperationCopysign. OperationKindCopysign + // OperationKindI32WrapFromI64 is the kind for OperationI32WrapFromI64. OperationKindI32WrapFromI64 + // OperationKindITruncFromF is the kind for OperationITruncFromF. OperationKindITruncFromF + // OperationKindFConvertFromI is the kind for OperationFConvertFromI. OperationKindFConvertFromI + // OperationKindF32DemoteFromF64 is the kind for OperationF32DemoteFromF64. OperationKindF32DemoteFromF64 + // OperationKindF64PromoteFromF32 is the kind for OperationF64PromoteFromF32. OperationKindF64PromoteFromF32 + // OperationKindI32ReinterpretFromF32 is the kind for OperationI32ReinterpretFromF32. OperationKindI32ReinterpretFromF32 + // OperationKindI64ReinterpretFromF64 is the kind for OperationI64ReinterpretFromF64. OperationKindI64ReinterpretFromF64 + // OperationKindF32ReinterpretFromI32 is the kind for OperationF32ReinterpretFromI32. OperationKindF32ReinterpretFromI32 + // OperationKindF64ReinterpretFromI64 is the kind for OperationF64ReinterpretFromI64. OperationKindF64ReinterpretFromI64 + // OperationKindExtend is the kind for OperationExtend. OperationKindExtend + // OperationKindSignExtend32From8 is the kind for OperationSignExtend32From8. OperationKindSignExtend32From8 + // OperationKindSignExtend32From16 is the kind for OperationSignExtend32From16. OperationKindSignExtend32From16 + // OperationKindSignExtend64From8 is the kind for OperationSignExtend64From8. OperationKindSignExtend64From8 + // OperationKindSignExtend64From16 is the kind for OperationSignExtend64From16. OperationKindSignExtend64From16 + // OperationKindSignExtend64From32 is the kind for OperationSignExtend64From32. OperationKindSignExtend64From32 + // OperationKindMemoryInit is the kind for OperationMemoryInit. OperationKindMemoryInit + // OperationKindDataDrop is the kind for OperationDataDrop. OperationKindDataDrop + // OperationKindMemoryCopy is the kind for OperationMemoryCopy. OperationKindMemoryCopy + // OperationKindMemoryFill is the kind for OperationMemoryFill. OperationKindMemoryFill + // OperationKindTableInit is the kind for OperationTableInit. OperationKindTableInit + // OperationKindElemDrop is the kind for OperationElemDrop. OperationKindElemDrop + // OperationKindTableCopy is the kind for OperationTableCopy. OperationKindTableCopy + // OperationKindRefFunc is the kind for OperationRefFunc. OperationKindRefFunc + // OperationKindTableGet is the kind for OperationTableGet. OperationKindTableGet + // OperationKindTableSet is the kind for OperationTableSet. OperationKindTableSet + // OperationKindTableSize is the kind for OperationTableSize. OperationKindTableSize + // OperationKindTableGrow is the kind for OperationTableGrow. OperationKindTableGrow + // OperationKindTableFill is the kind for OperationTableFill. OperationKindTableFill // Vector value related instructions are prefixed by V128. + // OperationKindV128Const is the kind for OperationV128Const. OperationKindV128Const + // OperationKindV128Add is the kind for OperationV128Add. OperationKindV128Add + // OperationKindV128Sub is the kind for OperationV128Sub. OperationKindV128Sub + // OperationKindV128Load is the kind for OperationV128Load. OperationKindV128Load + // OperationKindV128LoadLane is the kind for OperationV128LoadLane. OperationKindV128LoadLane + // OperationKindV128Store is the kind for OperationV128Store. OperationKindV128Store + // OperationKindV128StoreLane is the kind for OperationV128StoreLane. OperationKindV128StoreLane + // OperationKindV128ExtractLane is the kind for OperationV128ExtractLane. OperationKindV128ExtractLane + // OperationKindV128ReplaceLane is the kind for OperationV128ReplaceLane. OperationKindV128ReplaceLane + // OperationKindV128Splat is the kind for OperationV128Splat. OperationKindV128Splat + // OperationKindV128Shuffle is the kind for OperationV128Shuffle. OperationKindV128Shuffle + // OperationKindV128Swizzle is the kind for OperationV128Swizzle. OperationKindV128Swizzle + // OperationKindV128AnyTrue is the kind for OperationV128AnyTrue. OperationKindV128AnyTrue + // OperationKindV128AllTrue is the kind for OperationV128AllTrue. OperationKindV128AllTrue + // OperationKindV128BitMask is the kind for OperationV128BitMask. OperationKindV128BitMask + // OperationKindV128And is the kind for OperationV128And. OperationKindV128And + // OperationKindV128Not is the kind for OperationV128Not. OperationKindV128Not + // OperationKindV128Or is the kind for OperationV128Or. OperationKindV128Or + // OperationKindV128Xor is the kind for OperationV128Xor. OperationKindV128Xor + // OperationKindV128Bitselect is the kind for OperationV128Bitselect. OperationKindV128Bitselect + // OperationKindV128AndNot is the kind for OperationV128AndNot. OperationKindV128AndNot + // OperationKindV128Shl is the kind for OperationV128Shl. OperationKindV128Shl + // OperationKindV128Shr is the kind for OperationV128Shr. OperationKindV128Shr + // OperationKindV128Cmp is the kind for OperationV128Cmp. OperationKindV128Cmp + // OperationKindV128AddSat is the kind for OperationV128AddSat. + OperationKindV128AddSat + // OperationKindV128SubSat is the kind for OperationV128SubSat. + OperationKindV128SubSat + // OperationKindV128Mul is the kind for OperationV128Mul. + OperationKindV128Mul + // OperationKindV128Div is the kind for OperationV128Div. + OperationKindV128Div + // OperationKindV128Neg is the kind for OperationV128Neg. + OperationKindV128Neg + // OperationKindV128Sqrt is the kind for OperationV128Sqrt. + OperationKindV128Sqrt + // OperationKindV128Abs is the kind for OperationV128Abs. + OperationKindV128Abs + // OperationKindV128Popcnt is the kind for OperationV128Popcnt. + OperationKindV128Popcnt + // OperationKindV128Min is the kind for OperationV128Min. + OperationKindV128Min + // OperationKindV128Max is the kind for OperationV128Max. + OperationKindV128Max + // OperationKindV128AvgrU is the kind for OperationV128AvgrU. + OperationKindV128AvgrU + // OperationKindV128Pmin is the kind for OperationV128Pmin. + OperationKindV128Pmin + // OperationKindV128Pmax is the kind for OperationV128Pmax. + OperationKindV128Pmax + // OperationKindV128Ceil is the kind for OperationV128Ceil. + OperationKindV128Ceil + // OperationKindV128Floor is the kind for OperationV128Floor. + OperationKindV128Floor + // OperationKindV128Trunc is the kind for OperationV128Trunc. + OperationKindV128Trunc + // OperationKindV128Nearest is the kind for OperationV128Nearest. + OperationKindV128Nearest + // OperationKindV128Extend is the kind for OperationV128Extend. + OperationKindV128Extend + // OperationKindV128ExtMul is the kind for OperationV128ExtMul. + OperationKindV128ExtMul + // OperationKindV128Q15mulrSatS is the kind for OperationV128Q15mulrSatS. + OperationKindV128Q15mulrSatS + // OperationKindV128ExtAddPairwise is the kind for OperationV128ExtAddPairwise. + OperationKindV128ExtAddPairwise + // OperationKindV128FloatPromote is the kind for OperationV128FloatPromote. + OperationKindV128FloatPromote + // OperationKindV128FloatDemote is the kind for OperationV128FloatDemote. + OperationKindV128FloatDemote + // OperationKindV128FConvertFromI is the kind for OperationV128FConvertFromI. + OperationKindV128FConvertFromI + // OperationKindV128Dot is the kind for OperationV128Dot. + OperationKindV128Dot + // OperationKindV128Narrow is the kind for OperationV128Narrow. + OperationKindV128Narrow + // OperationKindV128ITruncSatFromF is the kind for OperationV128ITruncSatFromF. + OperationKindV128ITruncSatFromF // operationKindEnd is always placed at the bottom of this iota definition to be used in the test. operationKindEnd ) +// Label is the label of each block in wazeroir where "block" consists of multiple operations, +// and must end with branching operations (e.g. OperationBr or OperationBrIf). type Label struct { FrameID uint32 Kind LabelKind } +// String implements fmt.Stringer. func (l *Label) String() (ret string) { if l == nil { // Sometimes String() is called on the nil label which is interpreted @@ -496,11 +732,23 @@ func (l *Label) String() (ret string) { return } +// LabelKind is the kind of the label. type LabelKind = byte const ( + // LabelKindHeader is the header for various blocks. For example, the "then" block of + // wasm.OpcodeIfName in Wasm has the label of this kind. LabelKindHeader LabelKind = iota + // LabelKindElse is the kind of label for "else" block of wasm.OpcodeIfName in Wasm. LabelKindElse + // LabelKindContinuation is the kind of label which is the continuation of blocks. + // For example, for wasm text like + // (func + // .... + // (if (local.get 0) (then (nop)) (else (nop))) + // return + // ) + // we have the continuation block (of if-block) corresponding to "return" opcode. LabelKindContinuation ) @@ -512,14 +760,19 @@ func (l *Label) asBranchTargetDrop() *BranchTargetDrop { return &BranchTargetDrop{Target: l.asBranchTarget()} } +// BranchTarget represents the branch operation's target such as OperationBr of OperationBrIf. type BranchTarget struct { + // Label holds the target label. Note that this is nullable and in that case + // the branch target is the "return" of the function. Label *Label } +// IsReturnTarget returns true if the branch target is the function return, false otherwise. func (b *BranchTarget) IsReturnTarget() bool { return b.Label == nil } +// String implements fmt.Stringer. func (b *BranchTarget) String() (ret string) { if b.IsReturnTarget() { ret = ".return" @@ -529,11 +782,14 @@ func (b *BranchTarget) String() (ret string) { return } +// BranchTargetDrop represents the branch target and the drop range which must be dropped +// before give the control over to the target label. type BranchTargetDrop struct { Target *BranchTarget ToDrop *InclusiveRange } +// String implements fmt.Stringer. func (b *BranchTargetDrop) String() (ret string) { if b.ToDrop != nil { ret = fmt.Sprintf("%s(drop %d..%d)", b.Target, b.ToDrop.Start, b.ToDrop.End) @@ -543,80 +799,149 @@ func (b *BranchTargetDrop) String() (ret string) { return } +// OperationUnreachable implements Operation. +// +// This corresponds to wasm.OpcodeUnreachable. +// +// The engines are expected to exit the execution with wasmruntime.ErrRuntimeUnreachable error. type OperationUnreachable struct{} -func (o *OperationUnreachable) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationUnreachable) Kind() OperationKind { return OperationKindUnreachable } +// OperationLabel implements Operation. +// +// This is used to inform the engines of the beginning of a label. type OperationLabel struct { Label *Label } -func (o *OperationLabel) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationLabel) Kind() OperationKind { return OperationKindLabel } +// OperationBr implements Operation. +// +// The engines are expected to branch into OperationBr.Target label. type OperationBr struct { Target *BranchTarget } -func (o *OperationBr) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationBr) Kind() OperationKind { return OperationKindBr } +// OperationBrIf implements Operation. +// +// The engines are expected to pop a value and branch into OperationBrIf.Then label if the value equals 1. +// Otherwise, the code branches into OperationBrIf.Else label. type OperationBrIf struct { Then, Else *BranchTargetDrop } -func (o *OperationBrIf) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationBrIf) Kind() OperationKind { return OperationKindBrIf } -type InclusiveRange struct { - Start, End int -} - +// OperationBrTable implements Operation. +// +// This corresponds to wasm.OpcodeBrTableName except that the label +// here means the wazeroir level, not the ones of Wasm. +// +// The engines are expected to do the br_table operation base on the +// OperationBrTable.Default and OperationBrTable.Targets. More precisely, +// this pops a value from the stack (called "index") and decide which branch we go into next +// based on the value. +// +// For example, assume we have operations like {default: L_DEFAULT, targets: [L0, L1, L2]}. +// If "index" >= len(defaults), then branch into the L_DEFAULT label. +// Otherwise, we enter label of targets[index]. type OperationBrTable struct { Targets []*BranchTargetDrop Default *BranchTargetDrop } -func (o *OperationBrTable) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationBrTable) Kind() OperationKind { return OperationKindBrTable } +// OperationCall implements Operation. +// +// This corresponds to wasm.OpcodeCallName, and engines are expected to +// enter into a function whose index equals OperationCall.FunctionIndex. type OperationCall struct { FunctionIndex uint32 } -func (o *OperationCall) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationCall) Kind() OperationKind { return OperationKindCall } +// OperationCallIndirect implements Operation. +// +// This corresponds to wasm.OpcodeCallIndirectName, and engines are expected to +// consume the one value from the top of stack (called "offset"), +// and make a function call against the function whose function address equals +// Tables[OperationCallIndirect.TableIndex][offset]. +// +// Note: This is called indirect function call in the sense that the target function is indirectly +// determined by the current state (top value) of the stack. +// Therefore, two checks are performed at runtime before entering the target function: +// 1) whether "offset" exceeds the length of table Tables[OperationCallIndirect.TableIndex]. +// 2) whether the type of the function table[offset] matches the function type specified by OperationCallIndirect.TypeIndex. type OperationCallIndirect struct { TypeIndex, TableIndex uint32 } -func (o *OperationCallIndirect) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationCallIndirect) Kind() OperationKind { return OperationKindCallIndirect } +// InclusiveRange is the range which spans across the value stack starting from the top to the bottom, and +// both boundary are included in the range. +type InclusiveRange struct { + Start, End int +} + +// OperationDrop implements Operation. +// +// The engines are expected to discard the values selected by OperationDrop.Depth which +// starts from the top of the stack to the bottom. type OperationDrop struct { - // Depths spans across the uint64 value stack at runtime to be dopped by this operation. + // Depths spans across the uint64 value stack at runtime to be dropped by this operation. Depth *InclusiveRange } -func (o *OperationDrop) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationDrop) Kind() OperationKind { return OperationKindDrop } +// OperationSelect implements Operation. +// +// This corresponds to wasm.OpcodeSelect. +// +// The engines are expected to pop three values, say [..., x2, x1, c], then if the value "c" equals zero, +// "x1" is pushed back onto the stack and, otherwise "x2" is pushed back. type OperationSelect struct{} -func (o *OperationSelect) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationSelect) Kind() OperationKind { return OperationKindSelect } +// OperationPick implements Operation. +// +// The engines are expected to copy a value pointed by OperationPick.Depth, and push the +// copied value onto the top of the stack. type OperationPick struct { // Depth is the location of the pick target in the uint64 value stack at runtime. // If IsTargetVector=true, this points to the location of the lower 64-bits of the vector. @@ -624,10 +949,15 @@ type OperationPick struct { IsTargetVector bool } -func (o *OperationPick) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationPick) Kind() OperationKind { return OperationKindPick } +// OperationSwap implements Operation. +// +// The engines are expected to swap the top value of the stack and the one specified by +// OperationSwap.Depth. type OperationSwap struct { // Depth is the location of the pick target in the uint64 value stack at runtime. // If IsTargetVector=true, this points the location of the lower 64-bits of the vector. @@ -635,19 +965,34 @@ type OperationSwap struct { IsTargetVector bool } -func (o *OperationSwap) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationSwap) Kind() OperationKind { return OperationKindSwap } +// OperationGlobalGet implements Operation. +// +// The engines are expected to read the global value specified by OperationGlobalGet.Index, +// and push the copy of the value onto the stack. +// +// See wasm.OpcodeGlobalGet. type OperationGlobalGet struct{ Index uint32 } -func (o *OperationGlobalGet) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationGlobalGet) Kind() OperationKind { return OperationKindGlobalGet } +// OperationGlobalSet implements Operation. +// +// The engines are expected to consume the value from the top of the stack, +// and write the value into the global specified by OperationGlobalSet.Index. +// +// See wasm.OpcodeGlobalSet. type OperationGlobalSet struct{ Index uint32 } -func (o *OperationGlobalSet) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationGlobalSet) Kind() OperationKind { return OperationKindGlobalSet } @@ -666,353 +1011,606 @@ type MemoryArg struct { Offset uint32 } +// OperationLoad implements Operation. +// +// This corresponds to wasm.OpcodeI32LoadName wasm.OpcodeI64LoadName wasm.OpcodeF32LoadName and wasm.OpcodeF64LoadName. +// +// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, +// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction. type OperationLoad struct { Type UnsignedType Arg *MemoryArg } -func (o *OperationLoad) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationLoad) Kind() OperationKind { return OperationKindLoad } +// OperationLoad8 implements Operation. +// +// This corresponds to wasm.OpcodeI32Load8SName wasm.OpcodeI32Load8UName wasm.OpcodeI64Load8SName wasm.OpcodeI64Load8UName. +// +// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, +// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction. type OperationLoad8 struct { Type SignedInt Arg *MemoryArg } -func (o *OperationLoad8) Kind() OperationKind { +// Kind implements Operation.Kind +func (OperationLoad8) Kind() OperationKind { return OperationKindLoad8 } +// OperationLoad16 implements Operation. +// +// This corresponds to wasm.OpcodeI32Load16SName wasm.OpcodeI32Load16UName wasm.OpcodeI64Load16SName wasm.OpcodeI64Load16UName. +// +// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, +// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction. type OperationLoad16 struct { Type SignedInt Arg *MemoryArg } -func (o *OperationLoad16) Kind() OperationKind { +// Kind implements Operation.Kind +func (OperationLoad16) Kind() OperationKind { return OperationKindLoad16 } +// OperationLoad32 implements Operation. +// +// This corresponds to wasm.OpcodeI64Load32SName wasm.OpcodeI64Load32UName. +// +// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, +// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction. type OperationLoad32 struct { Signed bool Arg *MemoryArg } -func (o *OperationLoad32) Kind() OperationKind { +// Kind implements Operation.Kind +func (OperationLoad32) Kind() OperationKind { return OperationKindLoad32 } +// OperationStore implements Operation. +// +// This corresponds to wasm.OpcodeI32StoreName wasm.OpcodeI64StoreName wasm.OpcodeF32StoreName wasm.OpcodeF64StoreName +// +// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, +// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction. type OperationStore struct { Type UnsignedType Arg *MemoryArg } -func (o *OperationStore) Kind() OperationKind { +// Kind implements Operation.Kind +func (*OperationStore) Kind() OperationKind { return OperationKindStore } +// OperationStore8 implements Operation. +// +// This corresponds to wasm.OpcodeI32Store8Name wasm.OpcodeI64Store8Name +// +// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, +// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction. type OperationStore8 struct { - // TODO: Semantically Type doesn't affect operation so consider deleting this field. - Type UnsignedInt - Arg *MemoryArg + Arg *MemoryArg } -func (o *OperationStore8) Kind() OperationKind { +// Kind implements Operation.Kind +func (OperationStore8) Kind() OperationKind { return OperationKindStore8 } +// OperationStore16 implements Operation. +// +// This corresponds to wasm.OpcodeI32Store16Name wasm.OpcodeI64Store16Name +// +// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, +// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction. type OperationStore16 struct { - // TODO: Semantically Type doesn't affect operation so consider deleting this field. - Type UnsignedInt - Arg *MemoryArg + Arg *MemoryArg } -func (o *OperationStore16) Kind() OperationKind { +// Kind implements Operation.Kind +func (OperationStore16) Kind() OperationKind { return OperationKindStore16 } +// OperationStore32 implements Operation. +// +// This corresponds to wasm.OpcodeI64Store32Name +// +// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, +// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction. type OperationStore32 struct { Arg *MemoryArg } // Kind implements Operation.Kind. -func (o *OperationStore32) Kind() OperationKind { +func (OperationStore32) Kind() OperationKind { return OperationKindStore32 } +// OperationMemorySize implements Operation. +// +// This corresponds to wasm.OpcodeMemorySize. +// +// The engines are expected to push the current page size of the memory onto the stack. type OperationMemorySize struct{} // Kind implements Operation.Kind. -func (o *OperationMemorySize) Kind() OperationKind { +func (OperationMemorySize) Kind() OperationKind { return OperationKindMemorySize } +// OperationMemoryGrow implements Operation. type OperationMemoryGrow struct{ Alignment uint64 } // Kind implements Operation.Kind. -func (o *OperationMemoryGrow) Kind() OperationKind { +// +// This corresponds to wasm.OpcodeMemoryGrow. +// +// The engines are expected to pop one value from the top of the stack, then +// execute wasm.MemoryInstance Grow with the value, and push the previous +// page size of the memory onto the stack. +func (OperationMemoryGrow) Kind() OperationKind { return OperationKindMemoryGrow } +// OperationConstI32 implements Operation. +// +// This corresponds to wasm.OpcodeI32Const. type OperationConstI32 struct{ Value uint32 } // Kind implements Operation.Kind. -func (o *OperationConstI32) Kind() OperationKind { +func (OperationConstI32) Kind() OperationKind { return OperationKindConstI32 } +// OperationConstI64 implements Operation. +// +// This corresponds to wasm.OpcodeI64Const. type OperationConstI64 struct{ Value uint64 } // Kind implements Operation.Kind. -func (o *OperationConstI64) Kind() OperationKind { +func (OperationConstI64) Kind() OperationKind { return OperationKindConstI64 } +// OperationConstF32 implements Operation. +// +// This corresponds to wasm.OpcodeF32Const. type OperationConstF32 struct{ Value float32 } // Kind implements Operation.Kind. -func (o *OperationConstF32) Kind() OperationKind { +func (OperationConstF32) Kind() OperationKind { return OperationKindConstF32 } +// OperationConstF64 implements Operation. +// +// This corresponds to wasm.OpcodeF64Const. type OperationConstF64 struct{ Value float64 } // Kind implements Operation.Kind. -func (o *OperationConstF64) Kind() OperationKind { +func (OperationConstF64) Kind() OperationKind { return OperationKindConstF64 } +// OperationEq implements Operation. +// +// This corresponds to wasm.OpcodeI32EqName wasm.OpcodeI64EqName wasm.OpcodeF32EqName wasm.OpcodeF64EqName type OperationEq struct{ Type UnsignedType } // Kind implements Operation.Kind. -func (o *OperationEq) Kind() OperationKind { +func (OperationEq) Kind() OperationKind { return OperationKindEq } +// OperationNe implements Operation. +// +// This corresponds to wasm.OpcodeI32NeName wasm.OpcodeI64NeName wasm.OpcodeF32NeName wasm.OpcodeF64NeName type OperationNe struct{ Type UnsignedType } // Kind implements Operation.Kind. -func (o *OperationNe) Kind() OperationKind { +func (OperationNe) Kind() OperationKind { return OperationKindNe } +// OperationEqz implements Operation. +// +// This corresponds to wasm.OpcodeI32EqzName wasm.OpcodeI64EqzName type OperationEqz struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationEqz) Kind() OperationKind { +func (OperationEqz) Kind() OperationKind { return OperationKindEqz } +// OperationLt implements Operation. +// +// This corresponds to wasm.OpcodeI32LtS wasm.OpcodeI32LtU wasm.OpcodeI64LtS wasm.OpcodeI64LtU wasm.OpcodeF32Lt wasm.OpcodeF64Lt type OperationLt struct{ Type SignedType } // Kind implements Operation.Kind. -func (o *OperationLt) Kind() OperationKind { +func (OperationLt) Kind() OperationKind { return OperationKindLt } +// OperationGt implements Operation. +// +// This corresponds to wasm.OpcodeI32GtS wasm.OpcodeI32GtU wasm.OpcodeI64GtS wasm.OpcodeI64GtU wasm.OpcodeF32Gt wasm.OpcodeF64Gt type OperationGt struct{ Type SignedType } // Kind implements Operation.Kind. -func (o *OperationGt) Kind() OperationKind { +func (OperationGt) Kind() OperationKind { return OperationKindGt } +// OperationLe implements Operation. +// +// This corresponds to wasm.OpcodeI32LeS wasm.OpcodeI32LeU wasm.OpcodeI64LeS wasm.OpcodeI64LeU wasm.OpcodeF32Le wasm.OpcodeF64Le type OperationLe struct{ Type SignedType } // Kind implements Operation.Kind. -func (o *OperationLe) Kind() OperationKind { +func (OperationLe) Kind() OperationKind { return OperationKindLe } +// OperationGe implements Operation. +// +// This corresponds to wasm.OpcodeI32GeS wasm.OpcodeI32GeU wasm.OpcodeI64GeS wasm.OpcodeI64GeU wasm.OpcodeF32Ge wasm.OpcodeF64Ge type OperationGe struct{ Type SignedType } // Kind implements Operation.Kind. -func (o *OperationGe) Kind() OperationKind { +func (OperationGe) Kind() OperationKind { return OperationKindGe } +// OperationAdd implements Operation. +// +// This corresponds to wasm.OpcodeI32AddName wasm.OpcodeI64AddName wasm.OpcodeF32AddName wasm.OpcodeF64AddName. type OperationAdd struct{ Type UnsignedType } // Kind implements Operation.Kind. -func (o *OperationAdd) Kind() OperationKind { +func (OperationAdd) Kind() OperationKind { return OperationKindAdd } +// OperationSub implements Operation. +// +// This corresponds to wasm.OpcodeI32SubName wasm.OpcodeI64SubName wasm.OpcodeF32SubName wasm.OpcodeF64SubName. type OperationSub struct{ Type UnsignedType } // Kind implements Operation.Kind. -func (o *OperationSub) Kind() OperationKind { +func (OperationSub) Kind() OperationKind { return OperationKindSub } +// OperationMul implements Operation. +// +// This corresponds to wasm.OpcodeI32MulName wasm.OpcodeI64MulName wasm.OpcodeF32MulName wasm.OpcodeF64MulName. type OperationMul struct{ Type UnsignedType } // Kind implements Operation.Kind. -func (o *OperationMul) Kind() OperationKind { +func (OperationMul) Kind() OperationKind { return OperationKindMul } +// OperationClz implements Operation. +// +// This corresponds to wasm.OpcodeI32ClzName wasm.OpcodeI64ClzName. +// +// The engines are expected to count up the leading zeros in the +// current top of the stack, and push the count result. +// For example, stack of [..., 0x00_ff_ff_ff] results in [..., 8]. +// See wasm.OpcodeI32Clz wasm.OpcodeI64Clz type OperationClz struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationClz) Kind() OperationKind { +func (OperationClz) Kind() OperationKind { return OperationKindClz } +// OperationCtz implements Operation. +// +// This corresponds to wasm.OpcodeI32CtzName wasm.OpcodeI64CtzName. +// +// The engines are expected to count up the trailing zeros in the +// current top of the stack, and push the count result. +// For example, stack of [..., 0xff_ff_ff_00] results in [..., 8]. type OperationCtz struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationCtz) Kind() OperationKind { +func (OperationCtz) Kind() OperationKind { return OperationKindCtz } +// OperationPopcnt implements Operation. +// +// This corresponds to wasm.OpcodeI32PopcntName wasm.OpcodeI64PopcntName. +// +// The engines are expected to count up the number of set bits in the +// current top of the stack, and push the count result. +// For example, stack of [..., 0b00_00_00_11] results in [..., 2]. type OperationPopcnt struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationPopcnt) Kind() OperationKind { +func (OperationPopcnt) Kind() OperationKind { return OperationKindPopcnt } +// OperationDiv implements Operation. +// +// This corresponds to wasm.OpcodeI32DivS wasm.OpcodeI32DivU wasm.OpcodeI64DivS +// wasm.OpcodeI64DivU wasm.OpcodeF32Div wasm.OpcodeF64Div. type OperationDiv struct{ Type SignedType } // Kind implements Operation.Kind. -func (o *OperationDiv) Kind() OperationKind { +func (OperationDiv) Kind() OperationKind { return OperationKindDiv } +// OperationRem implements Operation. +// +// This corresponds to wasm.OpcodeI32RemS wasm.OpcodeI32RemU wasm.OpcodeI64RemS wasm.OpcodeI64RemU. +// +// The engines are expected to perform division on the top +// two values of integer type on the stack and puts the remainder of the result +// onto the stack. For example, stack [..., 10, 3] results in [..., 1] where +// the quotient is discarded. type OperationRem struct{ Type SignedInt } // Kind implements Operation.Kind. -func (o *OperationRem) Kind() OperationKind { +func (OperationRem) Kind() OperationKind { return OperationKindRem } +// OperationAnd implements Operation. +// +// This corresponds to wasm.OpcodeI32AndName wasm.OpcodeI64AndName +// +// The engines are expected to perform "And" operation on +// top two values on the stack, and pushes the result. type OperationAnd struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationAnd) Kind() OperationKind { +func (OperationAnd) Kind() OperationKind { return OperationKindAnd } +// OperationOr implements Operation. +// +// This corresponds to wasm.OpcodeI32OrName wasm.OpcodeI64OrName +// +// The engines are expected to perform "Or" operation on +// top two values on the stack, and pushes the result. type OperationOr struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationOr) Kind() OperationKind { +func (OperationOr) Kind() OperationKind { return OperationKindOr } +// OperationXor implements Operation. +// +// This corresponds to wasm.OpcodeI32XorName wasm.OpcodeI64XorName +// +// The engines are expected to perform "Xor" operation on +// top two values on the stack, and pushes the result. type OperationXor struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationXor) Kind() OperationKind { +func (OperationXor) Kind() OperationKind { return OperationKindXor } +// OperationShl implements Operation. +// +// This corresponds to wasm.OpcodeI32ShlName wasm.OpcodeI64ShlName +// +// The engines are expected to perform "Shl" operation on +// top two values on the stack, and pushes the result. type OperationShl struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationShl) Kind() OperationKind { +func (OperationShl) Kind() OperationKind { return OperationKindShl } +// OperationShr implements Operation. +// +// This corresponds to wasm.OpcodeI32ShrSName wasm.OpcodeI32ShrUName wasm.OpcodeI64ShrSName wasm.OpcodeI64ShrUName +// +// If OperationShr.Type is signed integer, then, the engines are expected to perform arithmetic right shift on the two +// top values on the stack, otherwise do the logical right shift. type OperationShr struct{ Type SignedInt } // Kind implements Operation.Kind. -func (o *OperationShr) Kind() OperationKind { +func (OperationShr) Kind() OperationKind { return OperationKindShr } +// OperationRotl implements Operation. +// +// This corresponds to wasm.OpcodeI32RotlName wasm.OpcodeI64RotlName +// +// The engines are expected to perform "Rotl" operation on +// top two values on the stack, and pushes the result. type OperationRotl struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationRotl) Kind() OperationKind { +func (OperationRotl) Kind() OperationKind { return OperationKindRotl } +// OperationRotr implements Operation. +// +// This corresponds to wasm.OpcodeI32RotrName wasm.OpcodeI64RotrName +// +// The engines are expected to perform "Rotr" operation on +// top two values on the stack, and pushes the result. type OperationRotr struct{ Type UnsignedInt } // Kind implements Operation.Kind. -func (o *OperationRotr) Kind() OperationKind { +func (OperationRotr) Kind() OperationKind { return OperationKindRotr } +// OperationAbs implements Operation. +// +// This corresponds to wasm.OpcodeF32Abs wasm.OpcodeF64Abs type OperationAbs struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationAbs) Kind() OperationKind { +func (OperationAbs) Kind() OperationKind { return OperationKindAbs } +// OperationNeg implements Operation. +// +// This corresponds to wasm.OpcodeF32Neg wasm.OpcodeF64Neg type OperationNeg struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationNeg) Kind() OperationKind { +func (OperationNeg) Kind() OperationKind { return OperationKindNeg } +// OperationCeil implements Operation. +// +// This corresponds to wasm.OpcodeF32CeilName wasm.OpcodeF64CeilName type OperationCeil struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationCeil) Kind() OperationKind { +func (OperationCeil) Kind() OperationKind { return OperationKindCeil } +// OperationFloor implements Operation. +// +// This corresponds to wasm.OpcodeF32FloorName wasm.OpcodeF64FloorName type OperationFloor struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationFloor) Kind() OperationKind { +func (OperationFloor) Kind() OperationKind { return OperationKindFloor } +// OperationTrunc implements Operation. +// +// This corresponds to wasm.OpcodeF32TruncName wasm.OpcodeF64TruncName type OperationTrunc struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationTrunc) Kind() OperationKind { +func (OperationTrunc) Kind() OperationKind { return OperationKindTrunc } +// OperationNearest implements Operation. +// +// This corresponds to wasm.OpcodeF32NearestName wasm.OpcodeF64NearestName +// +// Note: this is *not* equivalent to math.Round and instead has the same +// the semantics of LLVM's rint intrinsic. See https://llvm.org/docs/LangRef.html#llvm-rint-intrinsic. +// For example, math.Round(-4.5) produces -5 while we want to produce -4. type OperationNearest struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationNearest) Kind() OperationKind { +func (OperationNearest) Kind() OperationKind { return OperationKindNearest } +// OperationSqrt implements Operation. +// +// This corresponds to wasm.OpcodeF32SqrtName wasm.OpcodeF64SqrtName type OperationSqrt struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationSqrt) Kind() OperationKind { +func (OperationSqrt) Kind() OperationKind { return OperationKindSqrt } +// OperationMin implements Operation. +// +// This corresponds to wasm.OpcodeF32MinName wasm.OpcodeF64MinName +// +// The engines are expected to pop two values from the stack, and push back the maximum of +// these two values onto the stack. For example, stack [..., 100.1, 1.9] results in [..., 1.9]. +// +// Note: WebAssembly specifies that min/max must always return NaN if one of values is NaN, +// which is a different behavior different from math.Min. type OperationMin struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationMin) Kind() OperationKind { +func (OperationMin) Kind() OperationKind { return OperationKindMin } +// OperationMax implements Operation. +// +// This corresponds to wasm.OpcodeF32MaxName wasm.OpcodeF64MaxName +// +// The engines are expected to pop two values from the stack, and push back the maximum of +// these two values onto the stack. For example, stack [..., 100.1, 1.9] results in [..., 100.1]. +// +// Note: WebAssembly specifies that min/max must always return NaN if one of values is NaN, +// which is a different behavior different from math.Max. type OperationMax struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationMax) Kind() OperationKind { +func (OperationMax) Kind() OperationKind { return OperationKindMax } +// OperationCopysign implements Operation. +// +// This corresponds to wasm.OpcodeF32CopysignName wasm.OpcodeF64CopysignName +// +// The engines are expected to pop two float values from the stack, and copy the signbit of +// the first-popped value to the last one. +// For example, stack [..., 1.213, -5.0] results in [..., -1.213]. type OperationCopysign struct{ Type Float } // Kind implements Operation.Kind. -func (o *OperationCopysign) Kind() OperationKind { +func (OperationCopysign) Kind() OperationKind { return OperationKindCopysign } +// OperationI32WrapFromI64 implements Operation. +// +// This corresponds to wasm.OpcodeI32WrapI64 and equivalent to uint64(uint32(v)) in Go. +// +// The engines are expected to replace the 64-bit int on top of the stack +// with the corresponding 32-bit integer. type OperationI32WrapFromI64 struct{} // Kind implements Operation.Kind. -func (o *OperationI32WrapFromI64) Kind() OperationKind { +func (OperationI32WrapFromI64) Kind() OperationKind { return OperationKindI32WrapFromI64 } +// OperationITruncFromF implements Operation. +// +// This corresponds to +// wasm.OpcodeI32TruncF32SName wasm.OpcodeI32TruncF32UName wasm.OpcodeI32TruncF64SName +// wasm.OpcodeI32TruncF64UName wasm.OpcodeI64TruncF32SName wasm.OpcodeI64TruncF32UName wasm.OpcodeI64TruncF64SName +// wasm.OpcodeI64TruncF64UName. wasm.OpcodeI32TruncSatF32SName wasm.OpcodeI32TruncSatF32UName +// wasm.OpcodeI32TruncSatF64SName wasm.OpcodeI32TruncSatF64UName wasm.OpcodeI64TruncSatF32SName +// wasm.OpcodeI64TruncSatF32UName wasm.OpcodeI64TruncSatF64SName wasm.OpcodeI64TruncSatF64UName +// +// See [1] and [2] for when we encounter undefined behavior in the WebAssembly specification if OperationITruncFromF.NonTrapping == false. +// To summarize, if the source float value is NaN or doesn't fit in the destination range of integers (incl. +=Inf), +// then the runtime behavior is undefined. In wazero, the engines are expected to exit the execution in these undefined cases with +// wasmruntime.ErrRuntimeInvalidConversionToInteger error. +// +// [1] https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefop-trunc-umathrmtruncmathsfu_m-n-z for unsigned integers. +// [2] https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefop-trunc-smathrmtruncmathsfs_m-n-z for signed integers. type OperationITruncFromF struct { InputType Float OutputType SignedInt @@ -1022,103 +1620,164 @@ type OperationITruncFromF struct { } // Kind implements Operation.Kind. -func (o *OperationITruncFromF) Kind() OperationKind { +func (OperationITruncFromF) Kind() OperationKind { return OperationKindITruncFromF } +// OperationFConvertFromI implements Operation. +// +// This corresponds to +// wasm.OpcodeF32ConvertI32SName wasm.OpcodeF32ConvertI32UName wasm.OpcodeF32ConvertI64SName wasm.OpcodeF32ConvertI64UName +// wasm.OpcodeF64ConvertI32SName wasm.OpcodeF64ConvertI32UName wasm.OpcodeF64ConvertI64SName wasm.OpcodeF64ConvertI64UName +// and equivalent to float32(uint32(x)), float32(int32(x)), etc in Go. type OperationFConvertFromI struct { InputType SignedInt OutputType Float } // Kind implements Operation.Kind. -func (o *OperationFConvertFromI) Kind() OperationKind { +func (OperationFConvertFromI) Kind() OperationKind { return OperationKindFConvertFromI } +// OperationF32DemoteFromF64 implements Operation. +// +// This corresponds to wasm.OpcodeF32DemoteF64 and is equivalent float32(float64(v)). type OperationF32DemoteFromF64 struct{} // Kind implements Operation.Kind. -func (o *OperationF32DemoteFromF64) Kind() OperationKind { +func (OperationF32DemoteFromF64) Kind() OperationKind { return OperationKindF32DemoteFromF64 } +// OperationF64PromoteFromF32 implements Operation. +// +// This corresponds to wasm.OpcodeF64PromoteF32 and is equivalent float64(float32(v)). type OperationF64PromoteFromF32 struct{} // Kind implements Operation.Kind. -func (o *OperationF64PromoteFromF32) Kind() OperationKind { +func (OperationF64PromoteFromF32) Kind() OperationKind { return OperationKindF64PromoteFromF32 } +// OperationI32ReinterpretFromF32 implements Operation. +// +// This corresponds to wasm.OpcodeI32ReinterpretF32Name. type OperationI32ReinterpretFromF32 struct{} // Kind implements Operation.Kind. -func (o *OperationI32ReinterpretFromF32) Kind() OperationKind { +func (OperationI32ReinterpretFromF32) Kind() OperationKind { return OperationKindI32ReinterpretFromF32 } +// OperationI64ReinterpretFromF64 implements Operation. +// +// This corresponds to wasm.OpcodeI64ReinterpretF64Name. type OperationI64ReinterpretFromF64 struct{} // Kind implements Operation.Kind. -func (o *OperationI64ReinterpretFromF64) Kind() OperationKind { +func (OperationI64ReinterpretFromF64) Kind() OperationKind { return OperationKindI64ReinterpretFromF64 } +// OperationF32ReinterpretFromI32 implements Operation. +// +// This corresponds to wasm.OpcodeF32ReinterpretI32Name. type OperationF32ReinterpretFromI32 struct{} // Kind implements Operation.Kind. -func (o *OperationF32ReinterpretFromI32) Kind() OperationKind { +func (OperationF32ReinterpretFromI32) Kind() OperationKind { return OperationKindF32ReinterpretFromI32 } +// OperationF64ReinterpretFromI64 implements Operation. +// +// This corresponds to wasm.OpcodeF64ReinterpretI64Name. type OperationF64ReinterpretFromI64 struct{} // Kind implements Operation.Kind. -func (o *OperationF64ReinterpretFromI64) Kind() OperationKind { +func (OperationF64ReinterpretFromI64) Kind() OperationKind { return OperationKindF64ReinterpretFromI64 } +// OperationExtend implements Operation. +// +// This corresponds to wasm.OpcodeI64ExtendI32SName wasm.OpcodeI64ExtendI32UName +// +// The engines are expected to extend the 32-bit signed or unsigned int on top of the stack +// as a 64-bit integer of corresponding signedness. For unsigned case, this is just reinterpreting the +// underlying bit pattern as 64-bit integer. For signed case, this is sign-extension which preserves the +// original integer's sign. type OperationExtend struct{ Signed bool } -func (o *OperationExtend) Kind() OperationKind { +// Kind implements Operation.Kind. +func (OperationExtend) Kind() OperationKind { return OperationKindExtend } +// OperationSignExtend32From8 implements Operation. +// +// This corresponds to wasm.OpcodeI32Extend8SName. +// +// The engines are expected to sign-extend the first 8-bits of 32-bit in as signed 32-bit int. type OperationSignExtend32From8 struct{} // Kind implements Operation.Kind. -func (o *OperationSignExtend32From8) Kind() OperationKind { +func (OperationSignExtend32From8) Kind() OperationKind { return OperationKindSignExtend32From8 } +// OperationSignExtend32From16 implements Operation. +// +// This corresponds to wasm.OpcodeI32Extend16SName. +// +// The engines are expected to sign-extend the first 16-bits of 32-bit in as signed 32-bit int. type OperationSignExtend32From16 struct{} // Kind implements Operation.Kind. -func (o *OperationSignExtend32From16) Kind() OperationKind { +func (OperationSignExtend32From16) Kind() OperationKind { return OperationKindSignExtend32From16 } +// OperationSignExtend64From8 implements Operation. +// +// This corresponds to wasm.OpcodeI64Extend8SName. +// +// The engines are expected to sign-extend the first 8-bits of 64-bit in as signed 32-bit int. type OperationSignExtend64From8 struct{} // Kind implements Operation.Kind. -func (o *OperationSignExtend64From8) Kind() OperationKind { +func (OperationSignExtend64From8) Kind() OperationKind { return OperationKindSignExtend64From8 } +// OperationSignExtend64From16 implements Operation. +// +// This corresponds to wasm.OpcodeI64Extend16SName. +// +// The engines are expected to sign-extend the first 16-bits of 64-bit in as signed 32-bit int. type OperationSignExtend64From16 struct{} // Kind implements Operation.Kind. -func (o *OperationSignExtend64From16) Kind() OperationKind { +func (OperationSignExtend64From16) Kind() OperationKind { return OperationKindSignExtend64From16 } +// OperationSignExtend64From32 implements Operation. +// +// This corresponds to wasm.OpcodeI64Extend32SName. +// +// The engines are expected to sign-extend the first 32-bits of 64-bit in as signed 32-bit int. type OperationSignExtend64From32 struct{} // Kind implements Operation.Kind. -func (o *OperationSignExtend64From32) Kind() OperationKind { +func (OperationSignExtend64From32) Kind() OperationKind { return OperationKindSignExtend64From32 } +// OperationMemoryInit implements Operation. +// +// This corresponds to wasm.OpcodeMemoryInitName. type OperationMemoryInit struct { // DataIndex is the index of the data instance in ModuleInstance.DataInstances // by which this operation instantiates a part of the memory. @@ -1126,10 +1785,13 @@ type OperationMemoryInit struct { } // Kind implements Operation.Kind. -func (o *OperationMemoryInit) Kind() OperationKind { +func (OperationMemoryInit) Kind() OperationKind { return OperationKindMemoryInit } +// OperationDataDrop implements Operation. +// +// This corresponds to wasm.OpcodeDataDropName. type OperationDataDrop struct { // DataIndex is the index of the data instance in ModuleInstance.DataInstances // which this operation drops. @@ -1137,24 +1799,33 @@ type OperationDataDrop struct { } // Kind implements Operation.Kind. -func (o *OperationDataDrop) Kind() OperationKind { +func (OperationDataDrop) Kind() OperationKind { return OperationKindDataDrop } +// OperationMemoryCopy implements Operation. +// +// This corresponds to wasm.OpcodeMemoryCopyName. type OperationMemoryCopy struct{} // Kind implements Operation.Kind. -func (o *OperationMemoryCopy) Kind() OperationKind { +func (OperationMemoryCopy) Kind() OperationKind { return OperationKindMemoryCopy } +// OperationMemoryFill implements Operation. +// +// This corresponds to wasm.OpcodeMemoryFillName. type OperationMemoryFill struct{} // Kind implements Operation.Kind. -func (o *OperationMemoryFill) Kind() OperationKind { +func (OperationMemoryFill) Kind() OperationKind { return OperationKindMemoryFill } +// OperationTableInit implements Operation. +// +// This corresponds to wasm.OpcodeTableInitName. type OperationTableInit struct { // ElemIndex is the index of the element by which this operation initializes a part of the table. ElemIndex uint32 @@ -1163,89 +1834,108 @@ type OperationTableInit struct { } // Kind implements Operation.Kind. -func (o *OperationTableInit) Kind() OperationKind { +func (OperationTableInit) Kind() OperationKind { return OperationKindTableInit } +// OperationElemDrop implements Operation. +// +// This corresponds to wasm.OpcodeElemDropName. type OperationElemDrop struct { // ElemIndex is the index of the element which this operation drops. ElemIndex uint32 } // Kind implements Operation.Kind. -func (o *OperationElemDrop) Kind() OperationKind { +func (OperationElemDrop) Kind() OperationKind { return OperationKindElemDrop } +// OperationTableCopy implements Operation. +// +// This corresponds to wasm.OpcodeTableCopyName. type OperationTableCopy struct { SrcTableIndex, DstTableIndex uint32 } // Kind implements Operation.Kind. -func (o *OperationTableCopy) Kind() OperationKind { +func (OperationTableCopy) Kind() OperationKind { return OperationKindTableCopy } -// OperationRefFunc corresponds to OpcodeRefFunc, and engines are expected to +// OperationRefFunc implements Operation. +// +// This corresponds to wasm.OpcodeRefFuncName, and engines are expected to // push the opaque pointer value of engine specific func for the given FunctionIndex. // -// OperationRefFunc implements Operation. +// Note: in wazero, we express any reference types (funcref or externref) as opaque pointers which is uint64. +// Therefore, the engine implementations emit instructions to push the address of *function onto the stack. type OperationRefFunc struct { FunctionIndex uint32 } // Kind implements Operation.Kind. -func (o *OperationRefFunc) Kind() OperationKind { +func (OperationRefFunc) Kind() OperationKind { return OperationKindRefFunc } // OperationTableGet implements Operation. +// +// This corresponds to wasm.OpcodeTableGetName. type OperationTableGet struct { TableIndex uint32 } // Kind implements Operation.Kind. -func (o *OperationTableGet) Kind() OperationKind { +func (OperationTableGet) Kind() OperationKind { return OperationKindTableGet } // OperationTableSet implements Operation. +// +// This corresponds to wasm.OpcodeTableSetName. type OperationTableSet struct { TableIndex uint32 } // Kind implements Operation.Kind. -func (o *OperationTableSet) Kind() OperationKind { +func (OperationTableSet) Kind() OperationKind { return OperationKindTableSet } // OperationTableSize implements Operation. +// +// This corresponds to wasm.OpcodeTableSizeName. type OperationTableSize struct { TableIndex uint32 } // Kind implements Operation.Kind. -func (o *OperationTableSize) Kind() OperationKind { +func (OperationTableSize) Kind() OperationKind { return OperationKindTableSize } // OperationTableGrow implements Operation. +// +// This corresponds to wasm.OpcodeTableGrowName. type OperationTableGrow struct { TableIndex uint32 } // Kind implements Operation.Kind. -func (o *OperationTableGrow) Kind() OperationKind { +func (OperationTableGrow) Kind() OperationKind { return OperationKindTableGrow } // OperationTableFill implements Operation. +// +// This corresponds to wasm.OpcodeTableFillName. type OperationTableFill struct { TableIndex uint32 } // Kind implements Operation.Kind. -func (o *OperationTableFill) Kind() OperationKind { +func (OperationTableFill) Kind() OperationKind { return OperationKindTableFill } @@ -1255,7 +1945,9 @@ type OperationV128Const struct { } // Kind implements Operation.Kind. -func (o *OperationV128Const) Kind() OperationKind { +// +// This corresponds to wasm.OpcodeVecV128Const. +func (OperationV128Const) Kind() OperationKind { return OperationKindV128Const } @@ -1291,68 +1983,85 @@ func shapeName(s Shape) (ret string) { } // OperationV128Add implements Operation. +// +// This corresponds to wasm.OpcodeVecI8x16AddName wasm.OpcodeVecI16x8AddName wasm.OpcodeVecI32x4AddName +// wasm.OpcodeVecI64x2AddName wasm.OpcodeVecF32x4AddName wasm.OpcodeVecF64x2AddName type OperationV128Add struct { Shape Shape } // Kind implements Operation.Kind. -func (o *OperationV128Add) Kind() OperationKind { +func (OperationV128Add) Kind() OperationKind { return OperationKindV128Add } // OperationV128Sub implements Operation. +// +// This corresponds to wasm.OpcodeVecI8x16SubName wasm.OpcodeVecI16x8SubName wasm.OpcodeVecI32x4SubName +// wasm.OpcodeVecI64x2SubName wasm.OpcodeVecF32x4SubName wasm.OpcodeVecF64x2SubName type OperationV128Sub struct { Shape Shape } // Kind implements Operation.Kind. -func (o *OperationV128Sub) Kind() OperationKind { +func (OperationV128Sub) Kind() OperationKind { return OperationKindV128Sub } -type LoadV128Type = byte +// V128LoadType represents a type of wasm.OpcodeVecV128Load* instructions. +type V128LoadType = byte const ( - // LoadV128Type128 corresponds to wasm.OpcodeVecV128LoadName. - LoadV128Type128 LoadV128Type = iota - // LoadV128Type8x8s corresponds to wasm.OpcodeVecV128Load8x8SName. - LoadV128Type8x8s - // LoadV128Type8x8u corresponds to wasm.OpcodeVecV128Load8x8UName. - LoadV128Type8x8u - // LoadV128Type16x4s corresponds to wasm.OpcodeVecV128Load16x4SName - LoadV128Type16x4s - // LoadV128Type16x4u corresponds to wasm.OpcodeVecV128Load16x4UName - LoadV128Type16x4u - // LoadV128Type32x2s corresponds to wasm.OpcodeVecV128Load32x2SName - LoadV128Type32x2s - // LoadV128Type32x2u corresponds to wasm.OpcodeVecV128Load32x2UName - LoadV128Type32x2u - // LoadV128Type8Splat corresponds to wasm.OpcodeVecV128Load8SplatName - LoadV128Type8Splat - // LoadV128Type16Splat corresponds to wasm.OpcodeVecV128Load16SplatName - LoadV128Type16Splat - // LoadV128Type32Splat corresponds to wasm.OpcodeVecV128Load32SplatName - LoadV128Type32Splat - // LoadV128Type64Splat corresponds to wasm.OpcodeVecV128Load64SplatName - LoadV128Type64Splat - // LoadV128Type32zero corresponds to wasm.OpcodeVecV128Load32zeroName - LoadV128Type32zero - // LoadV128Type64zero corresponds to wasm.OpcodeVecV128Load64zeroName - LoadV128Type64zero + // V128LoadType128 corresponds to wasm.OpcodeVecV128LoadName. + V128LoadType128 V128LoadType = iota + // V128LoadType8x8s corresponds to wasm.OpcodeVecV128Load8x8SName. + V128LoadType8x8s + // V128LoadType8x8u corresponds to wasm.OpcodeVecV128Load8x8UName. + V128LoadType8x8u + // V128LoadType16x4s corresponds to wasm.OpcodeVecV128Load16x4SName + V128LoadType16x4s + // V128LoadType16x4u corresponds to wasm.OpcodeVecV128Load16x4UName + V128LoadType16x4u + // V128LoadType32x2s corresponds to wasm.OpcodeVecV128Load32x2SName + V128LoadType32x2s + // V128LoadType32x2u corresponds to wasm.OpcodeVecV128Load32x2UName + V128LoadType32x2u + // V128LoadType8Splat corresponds to wasm.OpcodeVecV128Load8SplatName + V128LoadType8Splat + // V128LoadType16Splat corresponds to wasm.OpcodeVecV128Load16SplatName + V128LoadType16Splat + // V128LoadType32Splat corresponds to wasm.OpcodeVecV128Load32SplatName + V128LoadType32Splat + // V128LoadType64Splat corresponds to wasm.OpcodeVecV128Load64SplatName + V128LoadType64Splat + // V128LoadType32zero corresponds to wasm.OpcodeVecV128Load32zeroName + V128LoadType32zero + // V128LoadType64zero corresponds to wasm.OpcodeVecV128Load64zeroName + V128LoadType64zero ) // OperationV128Load implements Operation. +// +// This corresponds to +// wasm.OpcodeVecV128LoadName wasm.OpcodeVecV128Load8x8SName wasm.OpcodeVecV128Load8x8UName +// wasm.OpcodeVecV128Load16x4SName wasm.OpcodeVecV128Load16x4UName wasm.OpcodeVecV128Load32x2SName +// wasm.OpcodeVecV128Load32x2UName wasm.OpcodeVecV128Load8SplatName wasm.OpcodeVecV128Load16SplatName +// wasm.OpcodeVecV128Load32SplatName wasm.OpcodeVecV128Load64SplatName wasm.OpcodeVecV128Load32zeroName +// wasm.OpcodeVecV128Load64zeroName type OperationV128Load struct { - Type LoadV128Type + Type V128LoadType Arg *MemoryArg } // Kind implements Operation.Kind. -func (o *OperationV128Load) Kind() OperationKind { +func (OperationV128Load) Kind() OperationKind { return OperationKindV128Load } // OperationV128LoadLane implements Operation. +// +// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName +// wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName. type OperationV128LoadLane struct { // LaneIndex is >=0 && <(128/LaneSize). LaneIndex byte @@ -1362,21 +2071,27 @@ type OperationV128LoadLane struct { } // Kind implements Operation.Kind. -func (o *OperationV128LoadLane) Kind() OperationKind { +func (OperationV128LoadLane) Kind() OperationKind { return OperationKindV128LoadLane } // OperationV128Store implements Operation. +// +// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName +// wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName. type OperationV128Store struct { Arg *MemoryArg } // Kind implements Operation.Kind. -func (o *OperationV128Store) Kind() OperationKind { +func (OperationV128Store) Kind() OperationKind { return OperationKindV128Store } // OperationV128StoreLane implements Operation. +// +// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName +// wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName. type OperationV128StoreLane struct { // LaneIndex is >=0 && <(128/LaneSize). LaneIndex byte @@ -1386,11 +2101,17 @@ type OperationV128StoreLane struct { } // Kind implements Operation.Kind. -func (o *OperationV128StoreLane) Kind() OperationKind { +func (OperationV128StoreLane) Kind() OperationKind { return OperationKindV128StoreLane } // OperationV128ExtractLane implements Operation. +// +// This corresponds to +// wasm.OpcodeVecI8x16ExtractLaneSName wasm.OpcodeVecI8x16ExtractLaneUName +// wasm.OpcodeVecI16x8ExtractLaneSName wasm.OpcodeVecI16x8ExtractLaneUName +// wasm.OpcodeVecI32x4ExtractLaneName wasm.OpcodeVecI64x2ExtractLaneName +// wasm.OpcodeVecF32x4ExtractLaneName wasm.OpcodeVecF64x2ExtractLaneName. type OperationV128ExtractLane struct { // LaneIndex is >=0 && =0 &&