Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Added SVE LoadVector*NonFaultingZeroExtendTo* APIs #102860

Merged
merged 5 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,37 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
}

GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, opt);
if (intrinEmbMask.codeGenIsTableDriven())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will codeGenIsTableDriven always hold going forwards for additional intrinsics?

I wonder if it needs to be:

                        switch (intrinEmbMask.id)
                        {
                            case NI_Sve_LoadVectorByteNonFaultingZeroExtendToInt16:
                            case NI_Sve_LoadVectorByteNonFaultingZeroExtendToInt32:
                            ....
                            case NI_Sve_LoadVectorUInt32NonFaultingZeroExtendToInt64:
                            case NI_Sve_LoadVectorUInt32NonFaultingZeroExtendToUInt64:
                              GetEmitter()->emitIns_R_R_R_I ....

                            default:
                                GetEmitter()->emitIns_R_R_R .....
                                break;
                        }

Maybe for now it's fine.

Copy link
Contributor Author

@TIHan TIHan May 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will hold for additional intrinsics that do not have the HW_Flag_SpecialCodeGen flag, but have the HW_Flag_EmbeddedMaskedOperation flag.

{
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, opt);
}
else
{
switch (intrinEmbMask.id)
{
case NI_Sve_LoadVectorByteNonFaultingZeroExtendToInt16:
case NI_Sve_LoadVectorByteNonFaultingZeroExtendToInt32:
case NI_Sve_LoadVectorByteNonFaultingZeroExtendToInt64:
case NI_Sve_LoadVectorByteNonFaultingZeroExtendToUInt16:
case NI_Sve_LoadVectorByteNonFaultingZeroExtendToUInt32:
case NI_Sve_LoadVectorByteNonFaultingZeroExtendToUInt64:
case NI_Sve_LoadVectorUInt16NonFaultingZeroExtendToInt32:
case NI_Sve_LoadVectorUInt16NonFaultingZeroExtendToInt64:
case NI_Sve_LoadVectorUInt16NonFaultingZeroExtendToUInt32:
case NI_Sve_LoadVectorUInt16NonFaultingZeroExtendToUInt64:
case NI_Sve_LoadVectorUInt32NonFaultingZeroExtendToInt64:
case NI_Sve_LoadVectorUInt32NonFaultingZeroExtendToUInt64:
{
GetEmitter()->emitIns_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg,
0, opt);
break;
}

default:
unreached();
break;
}
}
break;
}

Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,18 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32,
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm missing something....

HW_Flag_SpecialCodeGen is set for these.
During codegen, genHWIntrinsic() is called with LoadVectorByteNonFaultingZeroExtendToInt16.
intrin.codeGenIsTableDriven() check fails (due to HW_Flag_SpecialCodeGen).
Code falls into the switch (intrin.id) at the end of the function.
Switch hits default: unreached() due to no LoadVectorByteNonFaultingZeroExtendToInt16 case

Copy link
Contributor Author

@TIHan TIHan May 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because of the HW_Flag_EmbeddedMaskedOperation flag, at the point of genHWIntrinsic, intrin will never be Sve_LoadVectorByteNonFaultingZeroExtendToInt16 and instead be Sve_ConditionalSelect that wraps Sve_LoadVectorByteNonFaultingZeroExtendToInt16. This is why I had to handle intrinEmbMask.codeGenIsTableDriven() like you saw.

HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16NonFaultingZeroExtendToUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32NonFaultingZeroExtendToInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32NonFaultingZeroExtendToUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, Load2xVectorAndUnzip, -1, 2, true, {INS_sve_ld2b, INS_sve_ld2b, INS_sve_ld2h, INS_sve_ld2h, INS_sve_ld2w, INS_sve_ld2w, INS_sve_ld2d, INS_sve_ld2d, INS_sve_ld2w, INS_sve_ld2d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Load3xVectorAndUnzip, -1, 2, true, {INS_sve_ld3b, INS_sve_ld3b, INS_sve_ld3h, INS_sve_ld3h, INS_sve_ld3w, INS_sve_ld3w, INS_sve_ld3d, INS_sve_ld3d, INS_sve_ld3w, INS_sve_ld3d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Load4xVectorAndUnzip, -1, 2, true, {INS_sve_ld4b, INS_sve_ld4b, INS_sve_ld4h, INS_sve_ld4h, INS_sve_ld4w, INS_sve_ld4w, INS_sve_ld4d, INS_sve_ld4d, INS_sve_ld4w, INS_sve_ld4d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters|HW_Flag_BaseTypeFromFirstArg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1710,6 +1710,78 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt32ZeroExtendToUInt64(Vector<ulong> mask, uint* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svldnf1ub_s16(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<short> LoadVectorByteNonFaultingZeroExtendToInt16(byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svldnf1ub_s32(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorByteNonFaultingZeroExtendToInt32(byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldnf1ub_s64(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorByteNonFaultingZeroExtendToInt64(byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svldnf1ub_u16(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ushort> LoadVectorByteNonFaultingZeroExtendToUInt16(byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldnf1ub_u32(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorByteNonFaultingZeroExtendToUInt32(byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldnf1ub_u64(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorByteNonFaultingZeroExtendToUInt64(byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svldnf1uh_s32(svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorUInt16NonFaultingZeroExtendToInt32(ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldnf1uh_s64(svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorUInt16NonFaultingZeroExtendToInt64(ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldnf1uh_u32(svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorUInt16NonFaultingZeroExtendToUInt32(ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldnf1uh_u64(svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt16NonFaultingZeroExtendToUInt64(ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldnf1uw_s64(svbool_t pg, const uint32_t *base)
/// LDNF1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorUInt32NonFaultingZeroExtendToInt64(uint* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldnf1uw_u64(svbool_t pg, const uint32_t *base)
/// LDNF1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt32NonFaultingZeroExtendToUInt64(uint* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint8x2_t svld2[_u8](svbool_t pg, const uint8_t *base)
/// LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xbase, #0, MUL VL]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1766,6 +1766,78 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt32ZeroExtendToUInt64(Vector<ulong> mask, uint* address) => LoadVectorUInt32ZeroExtendToUInt64(mask, address);

/// <summary>
/// svint16_t svldnf1ub_s16(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<short> LoadVectorByteNonFaultingZeroExtendToInt16(byte* address) => LoadVectorByteNonFaultingZeroExtendToInt16(address);

/// <summary>
/// svint32_t svldnf1ub_s32(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorByteNonFaultingZeroExtendToInt32(byte* address) => LoadVectorByteNonFaultingZeroExtendToInt32(address);

/// <summary>
/// svint64_t svldnf1ub_s64(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorByteNonFaultingZeroExtendToInt64(byte* address) => LoadVectorByteNonFaultingZeroExtendToInt64(address);

/// <summary>
/// svuint16_t svldnf1ub_u16(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ushort> LoadVectorByteNonFaultingZeroExtendToUInt16(byte* address) => LoadVectorByteNonFaultingZeroExtendToUInt16(address);

/// <summary>
/// svuint32_t svldnf1ub_u32(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorByteNonFaultingZeroExtendToUInt32(byte* address) => LoadVectorByteNonFaultingZeroExtendToUInt32(address);

/// <summary>
/// svuint64_t svldnf1ub_u64(svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorByteNonFaultingZeroExtendToUInt64(byte* address) => LoadVectorByteNonFaultingZeroExtendToUInt64(address);

/// <summary>
/// svint32_t svldnf1uh_s32(svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorUInt16NonFaultingZeroExtendToInt32(ushort* address) => LoadVectorUInt16NonFaultingZeroExtendToInt32(address);

/// <summary>
/// svint64_t svldnf1uh_s64(svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorUInt16NonFaultingZeroExtendToInt64(ushort* address) => LoadVectorUInt16NonFaultingZeroExtendToInt64(address);

/// <summary>
/// svuint32_t svldnf1uh_u32(svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorUInt16NonFaultingZeroExtendToUInt32(ushort* address) => LoadVectorUInt16NonFaultingZeroExtendToUInt32(address);

/// <summary>
/// svuint64_t svldnf1uh_u64(svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt16NonFaultingZeroExtendToUInt64(ushort* address) => LoadVectorUInt16NonFaultingZeroExtendToUInt64(address);

/// <summary>
/// svint64_t svldnf1uw_s64(svbool_t pg, const uint32_t *base)
/// LDNF1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorUInt32NonFaultingZeroExtendToInt64(uint* address) => LoadVectorUInt32NonFaultingZeroExtendToInt64(address);

/// <summary>
/// svuint64_t svldnf1uw_u64(svbool_t pg, const uint32_t *base)
/// LDNF1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt32NonFaultingZeroExtendToUInt64(uint* address) => LoadVectorUInt32NonFaultingZeroExtendToUInt64(address);

/// <summary>
/// svuint8x2_t svld2[_u8](svbool_t pg, const uint8_t *base)
/// LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xbase, #0, MUL VL]
Expand Down
Loading
Loading