Skip to content

Commit

Permalink
[mono] Basic SIMD support for System.Numerics.Vector2 on arm64 (#91659)
Browse files Browse the repository at this point in the history
* enable Basic SIMD support for Vector2 on arm64
* rename Vector2/3/4 methods table
  • Loading branch information
matouskozak authored Sep 21, 2023
1 parent 3551112 commit 09e796a
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 26 deletions.
23 changes: 16 additions & 7 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -3932,14 +3932,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
break;

case OP_XZERO:
arm_neon_eor_16b (code, dreg, dreg, dreg);
if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8)
arm_neon_eor_8b (code, dreg, dreg, dreg);
else
arm_neon_eor_16b (code, dreg, dreg, dreg);
break;
case OP_XONES:
arm_neon_eor_16b (code, dreg, dreg, dreg);
arm_neon_not_16b (code, dreg, dreg);
break;
case OP_XEXTRACT:
code = emit_xextract (code, VREG_FULL, GTMREG_TO_INT (ins->inst_c0), dreg, sreg1);
code = emit_xextract (code, (ins->inst_c1 == 8) ? VREG_LOW : VREG_FULL, GTMREG_TO_INT (ins->inst_c0), dreg, sreg1);
break;
case OP_STOREX_MEMBASE:
if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8)
Expand Down Expand Up @@ -3981,7 +3984,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_EXPAND_R4:
case OP_EXPAND_R8: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, 0);
if (ins->opcode == OP_EXPAND_R8)
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, 0);
else
arm_neon_fdup_e (code, get_vector_size_macro (ins), t, dreg, sreg1, 0);
break;
}
case OP_EXTRACT_I1:
Expand All @@ -4004,6 +4010,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
// Technically, this broadcasts element #inst_c0 to all dest XREG elements; whereas it should
// set the FREG to the said element. Since FREG and XREG pool is the same on arm64 and the rest
// of the F/XREG is ignored in FREG mode, this operation remains valid.
// FIXME: pass VREG_LOW for 64-bit vectors
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, GTMREG_TO_UINT32 (ins->inst_c0));
}
break;
Expand Down Expand Up @@ -4098,17 +4105,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)

case OP_ARM64_XADDV: {
switch (ins->inst_c0) {
case INTRINS_AARCH64_ADV_SIMD_FADDV:
case INTRINS_AARCH64_ADV_SIMD_FADDV: {
const int width = get_vector_size_macro (ins);
if (ins->inst_c1 == MONO_TYPE_R8) {
arm_neon_faddp (code, VREG_FULL, TYPE_F64, dreg, sreg1, sreg1);
} else if (ins->inst_c1 == MONO_TYPE_R4) {
arm_neon_faddp (code, VREG_FULL, TYPE_F32, dreg, sreg1, sreg1);
arm_neon_faddp (code, VREG_FULL, TYPE_F32, dreg, dreg, dreg);
arm_neon_faddp (code, width, TYPE_F32, dreg, sreg1, sreg1);
if (width == VREG_FULL)
arm_neon_faddp (code, width, TYPE_F32, dreg, dreg, dreg);
} else {
g_assert_not_reached ();
}
break;

}
case INTRINS_AARCH64_ADV_SIMD_UADDV:
case INTRINS_AARCH64_ADV_SIMD_SADDV:
if (get_type_size_macro (ins->inst_c1) == TYPE_I64)
Expand Down
10 changes: 9 additions & 1 deletion src/mono/mono/mini/mini-runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -4469,7 +4469,7 @@ init_class (MonoClass *klass)

const char *name = m_class_get_name (klass);

#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_WASM)
#if defined(TARGET_AMD64) || defined(TARGET_WASM)
/*
* Some of the intrinsics used by the VectorX classes are only implemented on amd64.
* The JIT can't handle SIMD types with != 16 size yet.
Expand All @@ -4481,6 +4481,14 @@ init_class (MonoClass *klass)
}
#endif

#ifdef TARGET_ARM64
if (!strcmp (m_class_get_name_space (klass), "System.Numerics")) {
// FIXME: Support Vector3 https://github.com/dotnet/runtime/issues/81501
if (!strcmp (name, "Vector2") || !strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane"))
mono_class_set_is_simd_type (klass, TRUE);
}
#endif

if (m_class_is_ginst (klass)) {
if (!strcmp (name, "Vector`1") || !strcmp (name, "Vector64`1") || !strcmp (name, "Vector128`1") || !strcmp (name, "Vector256`1") || !strcmp (name, "Vector512`1")) {
MonoGenericClass *gclass = mono_class_try_get_generic_class (klass);
Expand Down
3 changes: 3 additions & 0 deletions src/mono/mono/mini/mini.c
Original file line number Diff line number Diff line change
Expand Up @@ -4573,6 +4573,9 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems)
if (!strcmp (klass_name, "Vector4") || !strcmp (klass_name, "Quaternion") || !strcmp (klass_name, "Plane")) {
*nelems = 4;
return MONO_TYPE_R4;
} else if (!strcmp (klass_name, "Vector2")) {
*nelems = 2;
return MONO_TYPE_R4;
} else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) {
MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0];
int size = mono_class_value_size (klass, NULL);
Expand Down
8 changes: 8 additions & 0 deletions src/mono/mono/mini/simd-arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ SIMD_OP (64, OP_XCOMPARE_FP, CMP_LE, WTDSS_REV, _UNDEF

SIMD_OP (64, OP_XBINOP, OP_IADD, WTDSS, arm_neon_add, arm_neon_add, arm_neon_add, _UNDEF, _UNDEF, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FADD, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fadd, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FSUB, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fsub, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FMAX, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmax, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FMIN, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmin, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FMUL, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmul, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FDIV, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fdiv, _UNDEF)
SIMD_OP (64, OP_ARM64_XADDV, INTRINS_AARCH64_ADV_SIMD_FADDV, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, _SKIP, _UNDEF)
SIMD_OP (64, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FSQRT, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fsqrt, _UNDEF)
SIMD_OP (64, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FABS, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fabs, _UNDEF)

/* 128-bit vectors */
/* Width Opcode Function Operand config I8 I16 I32 I64 F32 F64 */
Expand Down
36 changes: 18 additions & 18 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1845,7 +1845,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
}

// FIXME: Add support for Vector64 on arm64
// FIXME: Add support for Vector64 on arm64 https://github.com/dotnet/runtime/issues/90402
int size = mono_class_value_size (arg_class, NULL);
if (size != 16)
return NULL;
Expand Down Expand Up @@ -2608,7 +2608,7 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
}

// System.Numerics.Vector2/Vector3/Vector4, Quaternion, and Plane
static guint16 vector2_methods[] = {
static guint16 vector_2_3_4_methods[] = {
SN_ctor,
SN_Abs,
SN_Add,
Expand Down Expand Up @@ -2651,12 +2651,12 @@ static G_GNUC_UNUSED MonoInst*
emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
MonoInst *ins;
int id;
int id, len;
MonoClass *klass;
MonoType *type, *etype;


id = lookup_intrins (vector2_methods, sizeof (vector2_methods), cmethod);
id = lookup_intrins (vector_2_3_4_methods, sizeof (vector_2_3_4_methods), cmethod);
if (id == -1) {
// https://github.com/dotnet/runtime/issues/81961
// check_no_intrinsic_cattr (cmethod);
Expand All @@ -2677,6 +2677,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
klass = cmethod->klass;
type = m_class_get_byval_arg (klass);
etype = m_class_get_byval_arg (mono_defaults.single_class);
len = mono_class_value_size (klass, NULL) / 4;

// Similar to the cases in emit_sys_numerics_vector_t ()
switch (id) {
Expand Down Expand Up @@ -2706,7 +2707,6 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
}
// FIXME: These don't work since Vector2/Vector3 are not handled as SIMD
#if 0
int len = mono_class_value_size (klass, NULL) / 4;
} else if (len == 3 && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_VALUETYPE && fsig->params [1]->type == etype->type) {
/* Vector3 (Vector2, float) */
int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
Expand Down Expand Up @@ -2739,26 +2739,25 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
case SN_get_Item: {
// GetElement is marked as Intrinsic, but handling this in get_Item leads to better code
int src1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
int elems = 4;
MonoTypeEnum ty = etype->type;

if (args [1]->opcode == OP_ICONST) {
// If the index is provably a constant, we can generate vastly better code.
int index = GTMREG_TO_INT (args[1]->inst_c0);

if (index < 0 || index >= elems) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
if (index < 0 || index >= len) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
}

int opcode = type_to_extract_op (ty);
ins = emit_simd_ins (cfg, klass, opcode, src1, -1);
ins->inst_c0 = args[1]->inst_c0;
ins->inst_c0 = index;
ins->inst_c1 = ty;
return ins;
}

MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");

if (COMPILE_LLVM (cfg)) {
Expand Down Expand Up @@ -2826,14 +2825,14 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
g_assert (fsig->hasthis && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_I4 && fsig->params [1]->type == MONO_TYPE_R4);

gboolean indirect = FALSE;
int elems = 4, index = GTMREG_TO_INT (args [1]->inst_c0);
int index = GTMREG_TO_INT (args [1]->inst_c0);
int dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);

if (args [1]->opcode == OP_ICONST) {
// If the index is provably a constant, we can generate vastly better code.
// Bounds check only if the index is out of range
if (index < 0 || index >= elems) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
if (index < 0 || index >= len) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
}

Expand All @@ -2850,7 +2849,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
return ins;
}

MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");

if (COMPILE_LLVM (cfg)) {
Expand Down Expand Up @@ -2968,7 +2967,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
#endif
}
case SN_CopyTo:
// FIXME:
// FIXME: https://github.com/dotnet/runtime/issues/91394
return NULL;
case SN_Clamp: {
if (!(!fsig->hasthis && fsig->param_count == 3 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type) && mono_metadata_type_equal (fsig->params [2], type)))
Expand All @@ -2991,7 +2990,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
case SN_LengthSquared:
case SN_Lerp:
case SN_Normalize: {
// FIXME:
// FIXME: https://github.com/dotnet/runtime/issues/91394
return NULL;
}
default:
Expand Down Expand Up @@ -5896,8 +5895,9 @@ arch_emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCom
}

if (!strcmp (class_ns, "System.Numerics")) {
// FIXME: Support Vector2/Vector3
if (!strcmp (class_name, "Vector4") || !strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane"))
// FIXME: Support Vector2 https://github.com/dotnet/runtime/issues/81501
if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector4") ||
!strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane"))
return emit_vector_2_3_4 (cfg, cmethod, fsig, args);
}

Expand Down

0 comments on commit 09e796a

Please sign in to comment.