-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[API Proposal]: Arm64: FEAT_SVE: loads #94006
Comments
Tagging subscribers to this area: @dotnet/area-system-numerics Issue Detailsnamespace System.Runtime.Intrinsics.Arm
/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE Category: loads
{
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> ComputeVectorAddresses16(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> ComputeVectorAddresses16(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> ComputeVectorAddresses32(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> ComputeVectorAddresses32(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> ComputeVectorAddresses64(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> ComputeVectorAddresses64(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> ComputeVectorAddresses8(Vector<T> bases, Vector<T2> offsets); // ADR
/// T: uint, ulong
public static unsafe Vector<T> ComputeVectorAddresses8(Vector<T> bases, Vector<T> offsets); // ADR
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> Load128ReplicateToVector(const T *base); // LD1RQW or LD1RQD or LD1RQB or LD1RQH
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector(const T *base); // LD1W or LD1D or LD1B or LD1H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector(const T *base, long vnum); // LD1W or LD1D or LD1B or LD1H
public static unsafe Vector<long> LoadVector(const long *base, long vnum);
/// T: int, long, uint, ulong
public static unsafe Vector<T> LoadVector16SignExtend(const short *base); // LD1SH
/// T: int, long, uint, ulong
public static unsafe Vector<T> LoadVector16SignExtend(const short *base, long vnum); // LD1SH
/// T: int, long, uint, ulong
public static unsafe Vector<T> LoadVector16SignExtendNonFaulting(const short *base); // LDNF1SH
/// T: int, long, uint, ulong
public static unsafe Vector<T> LoadVector16SignExtendNonFaulting(const short *base, long vnum); // LDNF1SH
/// T: int, long, uint, ulong
public static unsafe Vector<T> LoadVector16ZeroExtend(const ushort *base); // LD1H
/// T: int, long, uint, ulong
public static unsafe Vector<T> LoadVector16ZeroExtend(const ushort *base, long vnum); // LD1H
/// T: int, long, uint, ulong
public static unsafe Vector<T> LoadVector16ZeroExtendNonFaulting(const ushort *base); // LDNF1H
/// T: int, long, uint, ulong
public static unsafe Vector<T> LoadVector16ZeroExtendNonFaulting(const ushort *base, long vnum); // LDNF1H
/// T: long, ulong
public static unsafe Vector<T> LoadVector32SignExtend(const int *base); // LD1SW
/// T: long, ulong
public static unsafe Vector<T> LoadVector32SignExtend(const int *base, long vnum); // LD1SW
/// T: long, ulong
public static unsafe Vector<T> LoadVector32SignExtendNonFaulting(const int *base); // LDNF1SW
/// T: long, ulong
public static unsafe Vector<T> LoadVector32SignExtendNonFaulting(const int *base, long vnum); // LDNF1SW
/// T: long, ulong
public static unsafe Vector<T> LoadVector32ZeroExtend(const uint *base); // LD1W
/// T: long, ulong
public static unsafe Vector<T> LoadVector32ZeroExtend(const uint *base, long vnum); // LD1W
/// T: long, ulong
public static unsafe Vector<T> LoadVector32ZeroExtendNonFaulting(const uint *base); // LDNF1W
/// T: long, ulong
public static unsafe Vector<T> LoadVector32ZeroExtendNonFaulting(const uint *base, long vnum); // LDNF1W
/// T: short, int, long, ushort, uint, ulong
public static unsafe Vector<T> LoadVector8SignExtend(const sbyte *base); // LD1SB
/// T: short, int, long, ushort, uint, ulong
public static unsafe Vector<T> LoadVector8SignExtend(const sbyte *base, long vnum); // LD1SB
/// T: short, int, long, ushort, uint, ulong
public static unsafe Vector<T> LoadVector8SignExtendNonFaulting(const sbyte *base); // LDNF1SB
/// T: short, int, long, ushort, uint, ulong
public static unsafe Vector<T> LoadVector8SignExtendNonFaulting(const sbyte *base, long vnum); // LDNF1SB
/// T: short, int, long, ushort, uint, ulong
public static unsafe Vector<T> LoadVector8ZeroExtend(const byte *base); // LD1B
/// T: short, int, long, ushort, uint, ulong
public static unsafe Vector<T> LoadVector8ZeroExtend(const byte *base, long vnum); // LD1B
/// T: short, int, long, ushort, uint, ulong
public static unsafe Vector<T> LoadVector8ZeroExtendNonFaulting(const byte *base); // LDNF1B
/// T: short, int, long, ushort, uint, ulong
public static unsafe Vector<T> LoadVector8ZeroExtendNonFaulting(const byte *base, long vnum); // LDNF1B
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonFaulting(const T *base); // LDNF1W or LDNF1D or LDNF1B or LDNF1H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonFaulting(const T *base, long vnum); // LDNF1W or LDNF1D or LDNF1B or LDNF1H
public static unsafe Vector<long> LoadVectorNonFaulting(const long *base, long vnum);
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonTemporal(const T *base); // LDNT1W or LDNT1D or LDNT1B or LDNT1H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonTemporal(const T *base, long vnum); // LDNT1W or LDNT1D or LDNT1B or LDNT1H
public static unsafe Vector<long> LoadVectorNonTemporal(const long *base, long vnum);
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>) LoadVectorx2(const T *base); // LD2W or LD2D or LD2B or LD2H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>) LoadVectorx2(const T *base, long vnum); // LD2W or LD2D or LD2B or LD2H
public static unsafe (Vector<long>, Vector<long>) LoadVectorx2(const long *base, long vnum);
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>, Vector<T>) LoadVectorx3(const T *base); // LD3W or LD3D or LD3B or LD3H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>, Vector<T>) LoadVectorx3(const T *base, long vnum); // LD3W or LD3D or LD3B or LD3H
public static unsafe (Vector<long>, Vector<long>, Vector<long>) LoadVectorx3(const long *base, long vnum);
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>, Vector<T>, Vector<T>) LoadVectorx4(const T *base); // LD4W or LD4D or LD4B or LD4H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>, Vector<T>, Vector<T>) LoadVectorx4(const T *base, long vnum); // LD4W or LD4D or LD4B or LD4H
public static unsafe (Vector<long>, Vector<long>, Vector<long>, Vector<long>) LoadVectorx4(const long *base, long vnum);
public static unsafe void PrefetchBytes(const void *base, enum svprfop op);
public static unsafe void PrefetchBytes(const void *base, long vnum, enum svprfop op);
public static unsafe void PrefetchDoublewords(const void *base, enum svprfop op);
public static unsafe void PrefetchDoublewords(const void *base, long vnum, enum svprfop op);
public static unsafe void PrefetchHalfwords(const void *base, enum svprfop op);
public static unsafe void PrefetchHalfwords(const void *base, long vnum, enum svprfop op);
public static unsafe void PrefetchWords(const void *base, enum svprfop op);
public static unsafe void PrefetchWords(const void *base, long vnum, enum svprfop op);
/// total method signatures: 59
}
|
/// Full API
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE Category: loads
{
/// ComputeByteAddresses : Compute vector addresses for 8-bit data
/// svuint32_t svadrb[_u32base]_[s32]offset(svuint32_t bases, svint32_t offsets) : "ADR Zresult.S, [Zbases.S, Zoffsets.S]"
public static unsafe Vector<uint> ComputeByteAddresses(Vector<uint> bases, Vector<int> offsets);
/// svuint32_t svadrb[_u32base]_[u32]offset(svuint32_t bases, svuint32_t offsets) : "ADR Zresult.S, [Zbases.S, Zoffsets.S]"
public static unsafe Vector<uint> ComputeByteAddresses(Vector<uint> bases, Vector<uint> offsets);
/// svuint64_t svadrb[_u64base]_[s64]offset(svuint64_t bases, svint64_t offsets) : "ADR Zresult.D, [Zbases.D, Zoffsets.D]"
public static unsafe Vector<ulong> ComputeByteAddresses(Vector<ulong> bases, Vector<long> offsets);
/// svuint64_t svadrb[_u64base]_[u64]offset(svuint64_t bases, svuint64_t offsets) : "ADR Zresult.D, [Zbases.D, Zoffsets.D]"
public static unsafe Vector<ulong> ComputeByteAddresses(Vector<ulong> bases, Vector<ulong> offsets);
/// ComputeInt16Addresses : Compute vector addresses for 16-bit data
/// svuint32_t svadrh[_u32base]_[s32]index(svuint32_t bases, svint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #1]"
public static unsafe Vector<uint> ComputeInt16Addresses(Vector<uint> bases, Vector<int> indices);
/// svuint32_t svadrh[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #1]"
public static unsafe Vector<uint> ComputeInt16Addresses(Vector<uint> bases, Vector<uint> indices);
/// svuint64_t svadrh[_u64base]_[s64]index(svuint64_t bases, svint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #1]"
public static unsafe Vector<ulong> ComputeInt16Addresses(Vector<ulong> bases, Vector<long> indices);
/// svuint64_t svadrh[_u64base]_[u64]index(svuint64_t bases, svuint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #1]"
public static unsafe Vector<ulong> ComputeInt16Addresses(Vector<ulong> bases, Vector<ulong> indices);
/// ComputeInt32Addresses : Compute vector addresses for 32-bit data
/// svuint32_t svadrw[_u32base]_[s32]index(svuint32_t bases, svint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #2]"
public static unsafe Vector<uint> ComputeInt32Addresses(Vector<uint> bases, Vector<int> indices);
/// svuint32_t svadrw[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #2]"
public static unsafe Vector<uint> ComputeInt32Addresses(Vector<uint> bases, Vector<uint> indices);
/// svuint64_t svadrw[_u64base]_[s64]index(svuint64_t bases, svint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #2]"
public static unsafe Vector<ulong> ComputeInt32Addresses(Vector<ulong> bases, Vector<long> indices);
/// svuint64_t svadrw[_u64base]_[u64]index(svuint64_t bases, svuint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #2]"
public static unsafe Vector<ulong> ComputeInt32Addresses(Vector<ulong> bases, Vector<ulong> indices);
/// ComputeInt64Addresses : Compute vector addresses for 64-bit data
/// svuint32_t svadrd[_u32base]_[s32]index(svuint32_t bases, svint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #3]"
public static unsafe Vector<uint> ComputeInt64Addresses(Vector<uint> bases, Vector<int> indices);
/// svuint32_t svadrd[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #3]"
public static unsafe Vector<uint> ComputeInt64Addresses(Vector<uint> bases, Vector<uint> indices);
/// svuint64_t svadrd[_u64base]_[s64]index(svuint64_t bases, svint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #3]"
public static unsafe Vector<ulong> ComputeInt64Addresses(Vector<ulong> bases, Vector<long> indices);
/// svuint64_t svadrd[_u64base]_[u64]index(svuint64_t bases, svuint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #3]"
public static unsafe Vector<ulong> ComputeInt64Addresses(Vector<ulong> bases, Vector<ulong> indices);
/// LoadVector : Unextended load
/// svfloat32_t svld1[_f32](svbool_t pg, const float32_t *base) : "LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<float> LoadVector(Vector<float> mask, const float *base);
/// svfloat64_t svld1[_f64](svbool_t pg, const float64_t *base) : "LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<double> LoadVector(Vector<double> mask, const double *base);
/// svint8_t svld1[_s8](svbool_t pg, const int8_t *base) : "LD1B Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<sbyte> LoadVector(Vector<sbyte> mask, const sbyte *base);
/// svint16_t svld1[_s16](svbool_t pg, const int16_t *base) : "LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVector(Vector<short> mask, const short *base);
/// svint32_t svld1[_s32](svbool_t pg, const int32_t *base) : "LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVector(Vector<int> mask, const int *base);
/// svint64_t svld1[_s64](svbool_t pg, const int64_t *base) : "LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVector(Vector<long> mask, const long *base);
/// svuint8_t svld1[_u8](svbool_t pg, const uint8_t *base) : "LD1B Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<byte> LoadVector(Vector<byte> mask, const byte *base);
/// svuint16_t svld1[_u16](svbool_t pg, const uint16_t *base) : "LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVector(Vector<ushort> mask, const ushort *base);
/// svuint32_t svld1[_u32](svbool_t pg, const uint32_t *base) : "LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVector(Vector<uint> mask, const uint *base);
/// svuint64_t svld1[_u64](svbool_t pg, const uint64_t *base) : "LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVector(Vector<ulong> mask, const ulong *base);
/// LoadVector128AndReplicateToVector : Load and replicate 128 bits of data
/// svfloat32_t svld1rq[_f32](svbool_t pg, const float32_t *base) : "LD1RQW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1RQW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1RQW Zresult.S, Pg/Z, [Xbase, #0]"
public static unsafe Vector<float> LoadVector128AndReplicateToVector(Vector<float> mask, const float *base);
/// svfloat64_t svld1rq[_f64](svbool_t pg, const float64_t *base) : "LD1RQD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1RQD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1RQD Zresult.D, Pg/Z, [Xbase, #0]"
public static unsafe Vector<double> LoadVector128AndReplicateToVector(Vector<double> mask, const double *base);
/// svint8_t svld1rq[_s8](svbool_t pg, const int8_t *base) : "LD1RQB Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1RQB Zresult.B, Pg/Z, [Xarray, #index]" or "LD1RQB Zresult.B, Pg/Z, [Xbase, #0]"
public static unsafe Vector<sbyte> LoadVector128AndReplicateToVector(Vector<sbyte> mask, const sbyte *base);
/// svint16_t svld1rq[_s16](svbool_t pg, const int16_t *base) : "LD1RQH Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1RQH Zresult.H, Pg/Z, [Xarray, #index * 2]" or "LD1RQH Zresult.H, Pg/Z, [Xbase, #0]"
public static unsafe Vector<short> LoadVector128AndReplicateToVector(Vector<short> mask, const short *base);
/// svint32_t svld1rq[_s32](svbool_t pg, const int32_t *base) : "LD1RQW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1RQW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1RQW Zresult.S, Pg/Z, [Xbase, #0]"
public static unsafe Vector<int> LoadVector128AndReplicateToVector(Vector<int> mask, const int *base);
/// svint64_t svld1rq[_s64](svbool_t pg, const int64_t *base) : "LD1RQD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1RQD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1RQD Zresult.D, Pg/Z, [Xbase, #0]"
public static unsafe Vector<long> LoadVector128AndReplicateToVector(Vector<long> mask, const long *base);
/// svuint8_t svld1rq[_u8](svbool_t pg, const uint8_t *base) : "LD1RQB Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1RQB Zresult.B, Pg/Z, [Xarray, #index]" or "LD1RQB Zresult.B, Pg/Z, [Xbase, #0]"
public static unsafe Vector<byte> LoadVector128AndReplicateToVector(Vector<byte> mask, const byte *base);
/// svuint16_t svld1rq[_u16](svbool_t pg, const uint16_t *base) : "LD1RQH Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1RQH Zresult.H, Pg/Z, [Xarray, #index * 2]" or "LD1RQH Zresult.H, Pg/Z, [Xbase, #0]"
public static unsafe Vector<ushort> LoadVector128AndReplicateToVector(Vector<ushort> mask, const ushort *base);
/// svuint32_t svld1rq[_u32](svbool_t pg, const uint32_t *base) : "LD1RQW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1RQW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1RQW Zresult.S, Pg/Z, [Xbase, #0]"
public static unsafe Vector<uint> LoadVector128AndReplicateToVector(Vector<uint> mask, const uint *base);
/// svuint64_t svld1rq[_u64](svbool_t pg, const uint64_t *base) : "LD1RQD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1RQD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1RQD Zresult.D, Pg/Z, [Xbase, #0]"
public static unsafe Vector<ulong> LoadVector128AndReplicateToVector(Vector<ulong> mask, const ulong *base);
/// LoadVectorByteSignExtendNonFaultingToInt16 : Load 8-bit data and sign-extend, non-faulting
/// svint16_t svldnf1sb_s16(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorByteSignExtendNonFaultingToInt16(Vector<short> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToInt32 : Load 8-bit data and sign-extend, non-faulting
/// svint32_t svldnf1sb_s32(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorByteSignExtendNonFaultingToInt32(Vector<int> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToInt64 : Load 8-bit data and sign-extend, non-faulting
/// svint64_t svldnf1sb_s64(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorByteSignExtendNonFaultingToInt64(Vector<long> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToUInt16 : Load 8-bit data and sign-extend, non-faulting
/// svuint16_t svldnf1sb_u16(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorByteSignExtendNonFaultingToUInt16(Vector<ushort> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToUInt32 : Load 8-bit data and sign-extend, non-faulting
/// svuint32_t svldnf1sb_u32(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorByteSignExtendNonFaultingToUInt32(Vector<uint> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToUInt64 : Load 8-bit data and sign-extend, non-faulting
/// svuint64_t svldnf1sb_u64(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorByteSignExtendNonFaultingToUInt64(Vector<ulong> mask, const sbyte *base);
/// LoadVectorByteSignExtendToInt16 : Load 8-bit data and sign-extend
/// svint16_t svld1sb_s16(svbool_t pg, const int8_t *base) : "LD1SB Zresult.H, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorByteSignExtendToInt16(Vector<short> mask, const sbyte *base);
/// LoadVectorByteSignExtendToInt32 : Load 8-bit data and sign-extend
/// svint32_t svld1sb_s32(svbool_t pg, const int8_t *base) : "LD1SB Zresult.S, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorByteSignExtendToInt32(Vector<int> mask, const sbyte *base);
/// LoadVectorByteSignExtendToInt64 : Load 8-bit data and sign-extend
/// svint64_t svld1sb_s64(svbool_t pg, const int8_t *base) : "LD1SB Zresult.D, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorByteSignExtendToInt64(Vector<long> mask, const sbyte *base);
/// LoadVectorByteSignExtendToUInt16 : Load 8-bit data and sign-extend
/// svuint16_t svld1sb_u16(svbool_t pg, const int8_t *base) : "LD1SB Zresult.H, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorByteSignExtendToUInt16(Vector<ushort> mask, const sbyte *base);
/// LoadVectorByteSignExtendToUInt32 : Load 8-bit data and sign-extend
/// svuint32_t svld1sb_u32(svbool_t pg, const int8_t *base) : "LD1SB Zresult.S, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorByteSignExtendToUInt32(Vector<uint> mask, const sbyte *base);
/// LoadVectorByteSignExtendToUInt64 : Load 8-bit data and sign-extend
/// svuint64_t svld1sb_u64(svbool_t pg, const int8_t *base) : "LD1SB Zresult.D, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorByteSignExtendToUInt64(Vector<ulong> mask, const sbyte *base);
/// LoadVectorByteZeroExtendNonFaultingToInt16 : Load 8-bit data and zero-extend, non-faulting
/// svint16_t svldnf1ub_s16(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorByteZeroExtendNonFaultingToInt16(Vector<short> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToInt32 : Load 8-bit data and zero-extend, non-faulting
/// svint32_t svldnf1ub_s32(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorByteZeroExtendNonFaultingToInt32(Vector<int> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToInt64 : Load 8-bit data and zero-extend, non-faulting
/// svint64_t svldnf1ub_s64(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorByteZeroExtendNonFaultingToInt64(Vector<long> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToUInt16 : Load 8-bit data and zero-extend, non-faulting
/// svuint16_t svldnf1ub_u16(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorByteZeroExtendNonFaultingToUInt16(Vector<ushort> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToUInt32 : Load 8-bit data and zero-extend, non-faulting
/// svuint32_t svldnf1ub_u32(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorByteZeroExtendNonFaultingToUInt32(Vector<uint> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToUInt64 : Load 8-bit data and zero-extend, non-faulting
/// svuint64_t svldnf1ub_u64(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorByteZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const byte *base);
/// LoadVectorByteZeroExtendToInt16 : Load 8-bit data and zero-extend
/// svint16_t svld1ub_s16(svbool_t pg, const uint8_t *base) : "LD1B Zresult.H, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorByteZeroExtendToInt16(Vector<short> mask, const byte *base);
/// LoadVectorByteZeroExtendToInt32 : Load 8-bit data and zero-extend
/// svint32_t svld1ub_s32(svbool_t pg, const uint8_t *base) : "LD1B Zresult.S, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorByteZeroExtendToInt32(Vector<int> mask, const byte *base);
/// LoadVectorByteZeroExtendToInt64 : Load 8-bit data and zero-extend
/// svint64_t svld1ub_s64(svbool_t pg, const uint8_t *base) : "LD1B Zresult.D, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorByteZeroExtendToInt64(Vector<long> mask, const byte *base);
/// LoadVectorByteZeroExtendToUInt16 : Load 8-bit data and zero-extend
/// svuint16_t svld1ub_u16(svbool_t pg, const uint8_t *base) : "LD1B Zresult.H, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorByteZeroExtendToUInt16(Vector<ushort> mask, const byte *base);
/// LoadVectorByteZeroExtendToUInt32 : Load 8-bit data and zero-extend
/// svuint32_t svld1ub_u32(svbool_t pg, const uint8_t *base) : "LD1B Zresult.S, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorByteZeroExtendToUInt32(Vector<uint> mask, const byte *base);
/// LoadVectorByteZeroExtendToUInt64 : Load 8-bit data and zero-extend
/// svuint64_t svld1ub_u64(svbool_t pg, const uint8_t *base) : "LD1B Zresult.D, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorByteZeroExtendToUInt64(Vector<ulong> mask, const byte *base);
/// LoadVectorInt16SignExtendNonFaultingToInt32 : Load 16-bit data and sign-extend, non-faulting
/// svint32_t svldnf1sh_s32(svbool_t pg, const int16_t *base) : "LDNF1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorInt16SignExtendNonFaultingToInt32(Vector<int> mask, const short *base);
/// LoadVectorInt16SignExtendNonFaultingToInt64 : Load 16-bit data and sign-extend, non-faulting
/// svint64_t svldnf1sh_s64(svbool_t pg, const int16_t *base) : "LDNF1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt16SignExtendNonFaultingToInt64(Vector<long> mask, const short *base);
/// LoadVectorInt16SignExtendNonFaultingToUInt32 : Load 16-bit data and sign-extend, non-faulting
/// svuint32_t svldnf1sh_u32(svbool_t pg, const int16_t *base) : "LDNF1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorInt16SignExtendNonFaultingToUInt32(Vector<uint> mask, const short *base);
/// LoadVectorInt16SignExtendNonFaultingToUInt64 : Load 16-bit data and sign-extend, non-faulting
/// svuint64_t svldnf1sh_u64(svbool_t pg, const int16_t *base) : "LDNF1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt16SignExtendNonFaultingToUInt64(Vector<ulong> mask, const short *base);
/// LoadVectorInt16SignExtendToInt32 : Load 16-bit data and sign-extend
/// svint32_t svld1sh_s32(svbool_t pg, const int16_t *base) : "LD1SH Zresult.S, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorInt16SignExtendToInt32(Vector<int> mask, const short *base);
/// LoadVectorInt16SignExtendToInt64 : Load 16-bit data and sign-extend
/// svint64_t svld1sh_s64(svbool_t pg, const int16_t *base) : "LD1SH Zresult.D, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(Vector<long> mask, const short *base);
/// LoadVectorInt16SignExtendToUInt32 : Load 16-bit data and sign-extend
/// svuint32_t svld1sh_u32(svbool_t pg, const int16_t *base) : "LD1SH Zresult.S, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorInt16SignExtendToUInt32(Vector<uint> mask, const short *base);
/// LoadVectorInt16SignExtendToUInt64 : Load 16-bit data and sign-extend
/// svuint64_t svld1sh_u64(svbool_t pg, const int16_t *base) : "LD1SH Zresult.D, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(Vector<ulong> mask, const short *base);
/// LoadVectorInt16ZeroExtendNonFaultingToInt32 : Load 16-bit data and zero-extend, non-faulting
/// svint32_t svldnf1uh_s32(svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorInt16ZeroExtendNonFaultingToInt32(Vector<int> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendNonFaultingToInt64 : Load 16-bit data and zero-extend, non-faulting
/// svint64_t svldnf1uh_s64(svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt16ZeroExtendNonFaultingToInt64(Vector<long> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendNonFaultingToUInt32 : Load 16-bit data and zero-extend, non-faulting
/// svuint32_t svldnf1uh_u32(svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendNonFaultingToUInt32(Vector<uint> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendNonFaultingToUInt64 : Load 16-bit data and zero-extend, non-faulting
/// svuint64_t svldnf1uh_u64(svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendToInt32 : Load 16-bit data and zero-extend
/// svint32_t svld1uh_s32(svbool_t pg, const uint16_t *base) : "LD1H Zresult.S, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorInt16ZeroExtendToInt32(Vector<int> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendToInt64 : Load 16-bit data and zero-extend
/// svint64_t svld1uh_s64(svbool_t pg, const uint16_t *base) : "LD1H Zresult.D, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(Vector<long> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendToUInt32 : Load 16-bit data and zero-extend
/// svuint32_t svld1uh_u32(svbool_t pg, const uint16_t *base) : "LD1H Zresult.S, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendToUInt32(Vector<uint> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendToUInt64 : Load 16-bit data and zero-extend
/// svuint64_t svld1uh_u64(svbool_t pg, const uint16_t *base) : "LD1H Zresult.D, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(Vector<ulong> mask, const ushort *base);
/// LoadVectorInt32SignExtendNonFaultingToInt64 : Load 32-bit data and sign-extend, non-faulting
/// svint64_t svldnf1sw_s64(svbool_t pg, const int32_t *base) : "LDNF1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt32SignExtendNonFaultingToInt64(Vector<long> mask, const int *base);
/// LoadVectorInt32SignExtendNonFaultingToUInt64 : Load 32-bit data and sign-extend, non-faulting
/// svuint64_t svldnf1sw_u64(svbool_t pg, const int32_t *base) : "LDNF1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt32SignExtendNonFaultingToUInt64(Vector<ulong> mask, const int *base);
/// LoadVectorInt32SignExtendToInt64 : Load 32-bit data and sign-extend
/// svint64_t svld1sw_s64(svbool_t pg, const int32_t *base) : "LD1SW Zresult.D, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt32SignExtendToInt64(Vector<long> mask, const int *base);
/// LoadVectorInt32SignExtendToUInt64 : Load 32-bit data and sign-extend
/// svuint64_t svld1sw_u64(svbool_t pg, const int32_t *base) : "LD1SW Zresult.D, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt32SignExtendToUInt64(Vector<ulong> mask, const int *base);
/// LoadVectorInt32ZeroExtendNonFaultingToInt64 : Load 32-bit data and zero-extend, non-faulting
/// svint64_t svldnf1uw_s64(svbool_t pg, const uint32_t *base) : "LDNF1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt32ZeroExtendNonFaultingToInt64(Vector<long> mask, const uint *base);
/// LoadVectorInt32ZeroExtendNonFaultingToUInt64 : Load 32-bit data and zero-extend, non-faulting
/// svuint64_t svldnf1uw_u64(svbool_t pg, const uint32_t *base) : "LDNF1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const uint *base);
/// LoadVectorInt32ZeroExtendToInt64 : Load 32-bit data and zero-extend
/// svint64_t svld1uw_s64(svbool_t pg, const uint32_t *base) : "LD1W Zresult.D, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt32ZeroExtendToInt64(Vector<long> mask, const uint *base);
/// LoadVectorInt32ZeroExtendToUInt64 : Load 32-bit data and zero-extend
/// svuint64_t svld1uw_u64(svbool_t pg, const uint32_t *base) : "LD1W Zresult.D, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendToUInt64(Vector<ulong> mask, const uint *base);
/// LoadVectorNonFaulting : Unextended load, non-faulting
/// svfloat32_t svldnf1[_f32](svbool_t pg, const float32_t *base) : "LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<float> LoadVectorNonFaulting(Vector<float> mask, const float *base);
/// svfloat64_t svldnf1[_f64](svbool_t pg, const float64_t *base) : "LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<double> LoadVectorNonFaulting(Vector<double> mask, const double *base);
/// svint8_t svldnf1[_s8](svbool_t pg, const int8_t *base) : "LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<sbyte> LoadVectorNonFaulting(Vector<sbyte> mask, const sbyte *base);
/// svint16_t svldnf1[_s16](svbool_t pg, const int16_t *base) : "LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorNonFaulting(Vector<short> mask, const short *base);
/// svint32_t svldnf1[_s32](svbool_t pg, const int32_t *base) : "LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorNonFaulting(Vector<int> mask, const int *base);
/// svint64_t svldnf1[_s64](svbool_t pg, const int64_t *base) : "LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorNonFaulting(Vector<long> mask, const long *base);
/// svuint8_t svldnf1[_u8](svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<byte> LoadVectorNonFaulting(Vector<byte> mask, const byte *base);
/// svuint16_t svldnf1[_u16](svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorNonFaulting(Vector<ushort> mask, const ushort *base);
/// svuint32_t svldnf1[_u32](svbool_t pg, const uint32_t *base) : "LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorNonFaulting(Vector<uint> mask, const uint *base);
/// svuint64_t svldnf1[_u64](svbool_t pg, const uint64_t *base) : "LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorNonFaulting(Vector<ulong> mask, const ulong *base);
/// LoadVectorNonTemporal : Unextended load, non-temporal
/// svfloat32_t svldnt1[_f32](svbool_t pg, const float32_t *base) : "LDNT1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<float> LoadVectorNonTemporal(Vector<float> mask, const float *base);
/// svfloat64_t svldnt1[_f64](svbool_t pg, const float64_t *base) : "LDNT1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<double> LoadVectorNonTemporal(Vector<double> mask, const double *base);
/// svint8_t svldnt1[_s8](svbool_t pg, const int8_t *base) : "LDNT1B Zresult.B, Pg/Z, [Xarray, Xindex]" or "LDNT1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<sbyte> LoadVectorNonTemporal(Vector<sbyte> mask, const sbyte *base);
/// svint16_t svldnt1[_s16](svbool_t pg, const int16_t *base) : "LDNT1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LDNT1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorNonTemporal(Vector<short> mask, const short *base);
/// svint32_t svldnt1[_s32](svbool_t pg, const int32_t *base) : "LDNT1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorNonTemporal(Vector<int> mask, const int *base);
/// svint64_t svldnt1[_s64](svbool_t pg, const int64_t *base) : "LDNT1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorNonTemporal(Vector<long> mask, const long *base);
/// svuint8_t svldnt1[_u8](svbool_t pg, const uint8_t *base) : "LDNT1B Zresult.B, Pg/Z, [Xarray, Xindex]" or "LDNT1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<byte> LoadVectorNonTemporal(Vector<byte> mask, const byte *base);
/// svuint16_t svldnt1[_u16](svbool_t pg, const uint16_t *base) : "LDNT1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LDNT1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorNonTemporal(Vector<ushort> mask, const ushort *base);
/// svuint32_t svldnt1[_u32](svbool_t pg, const uint32_t *base) : "LDNT1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorNonTemporal(Vector<uint> mask, const uint *base);
/// svuint64_t svldnt1[_u64](svbool_t pg, const uint64_t *base) : "LDNT1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorNonTemporal(Vector<ulong> mask, const ulong *base);
/// LoadVectorx2 : Load two-element tuples into two vectors
/// svfloat32x2_t svld2[_f32](svbool_t pg, const float32_t *base) : "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<float>, Vector<float>) LoadVectorx2(Vector<float> mask, const float *base);
/// svfloat64x2_t svld2[_f64](svbool_t pg, const float64_t *base) : "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<double>, Vector<double>) LoadVectorx2(Vector<double> mask, const double *base);
/// svint8x2_t svld2[_s8](svbool_t pg, const int8_t *base) : "LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xarray, Xindex]" or "LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<sbyte>, Vector<sbyte>) LoadVectorx2(Vector<sbyte> mask, const sbyte *base);
/// svint16x2_t svld2[_s16](svbool_t pg, const int16_t *base) : "LD2H {Zresult0.H, Zresult1.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD2H {Zresult0.H, Zresult1.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<short>, Vector<short>) LoadVectorx2(Vector<short> mask, const short *base);
/// svint32x2_t svld2[_s32](svbool_t pg, const int32_t *base) : "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<int>, Vector<int>) LoadVectorx2(Vector<int> mask, const int *base);
/// svint64x2_t svld2[_s64](svbool_t pg, const int64_t *base) : "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<long>, Vector<long>) LoadVectorx2(Vector<long> mask, const long *base);
/// svuint8x2_t svld2[_u8](svbool_t pg, const uint8_t *base) : "LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xarray, Xindex]" or "LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<byte>, Vector<byte>) LoadVectorx2(Vector<byte> mask, const byte *base);
/// svuint16x2_t svld2[_u16](svbool_t pg, const uint16_t *base) : "LD2H {Zresult0.H, Zresult1.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD2H {Zresult0.H, Zresult1.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ushort>, Vector<ushort>) LoadVectorx2(Vector<ushort> mask, const ushort *base);
/// svuint32x2_t svld2[_u32](svbool_t pg, const uint32_t *base) : "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<uint>, Vector<uint>) LoadVectorx2(Vector<uint> mask, const uint *base);
/// svuint64x2_t svld2[_u64](svbool_t pg, const uint64_t *base) : "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ulong>, Vector<ulong>) LoadVectorx2(Vector<ulong> mask, const ulong *base);
/// LoadVectorx3 : Load three-element tuples into three vectors
/// svfloat32x3_t svld3[_f32](svbool_t pg, const float32_t *base) : "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<float>, Vector<float>, Vector<float>) LoadVectorx3(Vector<float> mask, const float *base);
/// svfloat64x3_t svld3[_f64](svbool_t pg, const float64_t *base) : "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<double>, Vector<double>, Vector<double>) LoadVectorx3(Vector<double> mask, const double *base);
/// svint8x3_t svld3[_s8](svbool_t pg, const int8_t *base) : "LD3B {Zresult0.B - Zresult2.B}, Pg/Z, [Xarray, Xindex]" or "LD3B {Zresult0.B - Zresult2.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<sbyte>, Vector<sbyte>, Vector<sbyte>) LoadVectorx3(Vector<sbyte> mask, const sbyte *base);
/// svint16x3_t svld3[_s16](svbool_t pg, const int16_t *base) : "LD3H {Zresult0.H - Zresult2.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD3H {Zresult0.H - Zresult2.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<short>, Vector<short>, Vector<short>) LoadVectorx3(Vector<short> mask, const short *base);
/// svint32x3_t svld3[_s32](svbool_t pg, const int32_t *base) : "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<int>, Vector<int>, Vector<int>) LoadVectorx3(Vector<int> mask, const int *base);
/// svint64x3_t svld3[_s64](svbool_t pg, const int64_t *base) : "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<long>, Vector<long>, Vector<long>) LoadVectorx3(Vector<long> mask, const long *base);
/// svuint8x3_t svld3[_u8](svbool_t pg, const uint8_t *base) : "LD3B {Zresult0.B - Zresult2.B}, Pg/Z, [Xarray, Xindex]" or "LD3B {Zresult0.B - Zresult2.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<byte>, Vector<byte>, Vector<byte>) LoadVectorx3(Vector<byte> mask, const byte *base);
/// svuint16x3_t svld3[_u16](svbool_t pg, const uint16_t *base) : "LD3H {Zresult0.H - Zresult2.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD3H {Zresult0.H - Zresult2.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ushort>, Vector<ushort>, Vector<ushort>) LoadVectorx3(Vector<ushort> mask, const ushort *base);
/// svuint32x3_t svld3[_u32](svbool_t pg, const uint32_t *base) : "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<uint>, Vector<uint>, Vector<uint>) LoadVectorx3(Vector<uint> mask, const uint *base);
/// svuint64x3_t svld3[_u64](svbool_t pg, const uint64_t *base) : "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>) LoadVectorx3(Vector<ulong> mask, const ulong *base);
/// LoadVectorx4 : Load four-element tuples into four vectors
/// svfloat32x4_t svld4[_f32](svbool_t pg, const float32_t *base) : "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<float>, Vector<float>, Vector<float>, Vector<float>) LoadVectorx4(Vector<float> mask, const float *base);
/// svfloat64x4_t svld4[_f64](svbool_t pg, const float64_t *base) : "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<double>, Vector<double>, Vector<double>, Vector<double>) LoadVectorx4(Vector<double> mask, const double *base);
/// svint8x4_t svld4[_s8](svbool_t pg, const int8_t *base) : "LD4B {Zresult0.B - Zresult3.B}, Pg/Z, [Xarray, Xindex]" or "LD4B {Zresult0.B - Zresult3.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<sbyte>, Vector<sbyte>, Vector<sbyte>, Vector<sbyte>) LoadVectorx4(Vector<sbyte> mask, const sbyte *base);
/// svint16x4_t svld4[_s16](svbool_t pg, const int16_t *base) : "LD4H {Zresult0.H - Zresult3.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD4H {Zresult0.H - Zresult3.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<short>, Vector<short>, Vector<short>, Vector<short>) LoadVectorx4(Vector<short> mask, const short *base);
/// svint32x4_t svld4[_s32](svbool_t pg, const int32_t *base) : "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<int>, Vector<int>, Vector<int>, Vector<int>) LoadVectorx4(Vector<int> mask, const int *base);
/// svint64x4_t svld4[_s64](svbool_t pg, const int64_t *base) : "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<long>, Vector<long>, Vector<long>, Vector<long>) LoadVectorx4(Vector<long> mask, const long *base);
/// svuint8x4_t svld4[_u8](svbool_t pg, const uint8_t *base) : "LD4B {Zresult0.B - Zresult3.B}, Pg/Z, [Xarray, Xindex]" or "LD4B {Zresult0.B - Zresult3.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<byte>, Vector<byte>, Vector<byte>, Vector<byte>) LoadVectorx4(Vector<byte> mask, const byte *base);
/// svuint16x4_t svld4[_u16](svbool_t pg, const uint16_t *base) : "LD4H {Zresult0.H - Zresult3.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD4H {Zresult0.H - Zresult3.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ushort>, Vector<ushort>, Vector<ushort>, Vector<ushort>) LoadVectorx4(Vector<ushort> mask, const ushort *base);
/// svuint32x4_t svld4[_u32](svbool_t pg, const uint32_t *base) : "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<uint>, Vector<uint>, Vector<uint>, Vector<uint>) LoadVectorx4(Vector<uint> mask, const uint *base);
/// svuint64x4_t svld4[_u64](svbool_t pg, const uint64_t *base) : "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>, Vector<ulong>) LoadVectorx4(Vector<ulong> mask, const ulong *base);
/// PrefetchBytes : Prefetch bytes
/// void svprfb(svbool_t pg, const void *base, enum svprfop op) : "PRFB op, Pg, [Xarray, Xindex]" or "PRFB op, Pg, [Xbase, #0, MUL VL]"
public static unsafe void PrefetchBytes(Vector<byte> mask, const void *base, enum SvePrefetchType op);
/// PrefetchInt16 : Prefetch halfwords
/// void svprfh(svbool_t pg, const void *base, enum svprfop op) : "PRFH op, Pg, [Xarray, Xindex, LSL #1]" or "PRFH op, Pg, [Xbase, #0, MUL VL]"
public static unsafe void PrefetchInt16(Vector<ushort> mask, const void *base, enum SvePrefetchType op);
/// PrefetchInt32 : Prefetch words
/// void svprfw(svbool_t pg, const void *base, enum svprfop op) : "PRFW op, Pg, [Xarray, Xindex, LSL #2]" or "PRFW op, Pg, [Xbase, #0, MUL VL]"
public static unsafe void PrefetchInt32(Vector<uint> mask, const void *base, enum SvePrefetchType op);
/// PrefetchInt64 : Prefetch doublewords
/// void svprfd(svbool_t pg, const void *base, enum svprfop op) : "PRFD op, Pg, [Xarray, Xindex, LSL #3]" or "PRFD op, Pg, [Xbase, #0, MUL VL]"
public static unsafe void PrefetchInt64(Vector<ulong> mask, const void *base, enum SvePrefetchType op);
/// total method signatures: 138
/// total method names: 63
} |
/// Optional Entries:
/// public static unsafe Vector<float> LoadVector(Vector<float> mask, const float *base, long vnum); // svld1_vnum[_f32]
/// public static unsafe Vector<double> LoadVector(Vector<double> mask, const double *base, long vnum); // svld1_vnum[_f64]
/// public static unsafe Vector<sbyte> LoadVector(Vector<sbyte> mask, const sbyte *base, long vnum); // svld1_vnum[_s8]
/// public static unsafe Vector<short> LoadVector(Vector<short> mask, const short *base, long vnum); // svld1_vnum[_s16]
/// public static unsafe Vector<int> LoadVector(Vector<int> mask, const int *base, long vnum); // svld1_vnum[_s32]
/// public static unsafe Vector<long> LoadVector(Vector<long> mask, const long *base, long vnum); // svld1_vnum[_s64]
/// public static unsafe Vector<byte> LoadVector(Vector<byte> mask, const byte *base, long vnum); // svld1_vnum[_u8]
/// public static unsafe Vector<ushort> LoadVector(Vector<ushort> mask, const ushort *base, long vnum); // svld1_vnum[_u16]
/// public static unsafe Vector<uint> LoadVector(Vector<uint> mask, const uint *base, long vnum); // svld1_vnum[_u32]
/// public static unsafe Vector<ulong> LoadVector(Vector<ulong> mask, const ulong *base, long vnum); // svld1_vnum[_u64]
/// public static unsafe Vector<short> LoadVectorByteSignExtendNonFaultingToInt16(Vector<short> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_s16
/// public static unsafe Vector<int> LoadVectorByteSignExtendNonFaultingToInt32(Vector<int> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_s32
/// public static unsafe Vector<long> LoadVectorByteSignExtendNonFaultingToInt64(Vector<long> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_s64
/// public static unsafe Vector<ushort> LoadVectorByteSignExtendNonFaultingToUInt16(Vector<ushort> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_u16
/// public static unsafe Vector<uint> LoadVectorByteSignExtendNonFaultingToUInt32(Vector<uint> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorByteSignExtendNonFaultingToUInt64(Vector<ulong> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_u64
/// public static unsafe Vector<short> LoadVectorByteSignExtendToInt16(Vector<short> mask, const sbyte *base, long vnum); // svld1sb_vnum_s16
/// public static unsafe Vector<int> LoadVectorByteSignExtendToInt32(Vector<int> mask, const sbyte *base, long vnum); // svld1sb_vnum_s32
/// public static unsafe Vector<long> LoadVectorByteSignExtendToInt64(Vector<long> mask, const sbyte *base, long vnum); // svld1sb_vnum_s64
/// public static unsafe Vector<ushort> LoadVectorByteSignExtendToUInt16(Vector<ushort> mask, const sbyte *base, long vnum); // svld1sb_vnum_u16
/// public static unsafe Vector<uint> LoadVectorByteSignExtendToUInt32(Vector<uint> mask, const sbyte *base, long vnum); // svld1sb_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorByteSignExtendToUInt64(Vector<ulong> mask, const sbyte *base, long vnum); // svld1sb_vnum_u64
/// public static unsafe Vector<short> LoadVectorByteZeroExtendNonFaultingToInt16(Vector<short> mask, const byte *base, long vnum); // svldnf1ub_vnum_s16
/// public static unsafe Vector<int> LoadVectorByteZeroExtendNonFaultingToInt32(Vector<int> mask, const byte *base, long vnum); // svldnf1ub_vnum_s32
/// public static unsafe Vector<long> LoadVectorByteZeroExtendNonFaultingToInt64(Vector<long> mask, const byte *base, long vnum); // svldnf1ub_vnum_s64
/// public static unsafe Vector<ushort> LoadVectorByteZeroExtendNonFaultingToUInt16(Vector<ushort> mask, const byte *base, long vnum); // svldnf1ub_vnum_u16
/// public static unsafe Vector<uint> LoadVectorByteZeroExtendNonFaultingToUInt32(Vector<uint> mask, const byte *base, long vnum); // svldnf1ub_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorByteZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const byte *base, long vnum); // svldnf1ub_vnum_u64
/// public static unsafe Vector<short> LoadVectorByteZeroExtendToInt16(Vector<short> mask, const byte *base, long vnum); // svld1ub_vnum_s16
/// public static unsafe Vector<int> LoadVectorByteZeroExtendToInt32(Vector<int> mask, const byte *base, long vnum); // svld1ub_vnum_s32
/// public static unsafe Vector<long> LoadVectorByteZeroExtendToInt64(Vector<long> mask, const byte *base, long vnum); // svld1ub_vnum_s64
/// public static unsafe Vector<ushort> LoadVectorByteZeroExtendToUInt16(Vector<ushort> mask, const byte *base, long vnum); // svld1ub_vnum_u16
/// public static unsafe Vector<uint> LoadVectorByteZeroExtendToUInt32(Vector<uint> mask, const byte *base, long vnum); // svld1ub_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorByteZeroExtendToUInt64(Vector<ulong> mask, const byte *base, long vnum); // svld1ub_vnum_u64
/// public static unsafe Vector<int> LoadVectorInt16SignExtendNonFaultingToInt32(Vector<int> mask, const short *base, long vnum); // svldnf1sh_vnum_s32
/// public static unsafe Vector<long> LoadVectorInt16SignExtendNonFaultingToInt64(Vector<long> mask, const short *base, long vnum); // svldnf1sh_vnum_s64
/// public static unsafe Vector<uint> LoadVectorInt16SignExtendNonFaultingToUInt32(Vector<uint> mask, const short *base, long vnum); // svldnf1sh_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorInt16SignExtendNonFaultingToUInt64(Vector<ulong> mask, const short *base, long vnum); // svldnf1sh_vnum_u64
/// public static unsafe Vector<int> LoadVectorInt16SignExtendToInt32(Vector<int> mask, const short *base, long vnum); // svld1sh_vnum_s32
/// public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(Vector<long> mask, const short *base, long vnum); // svld1sh_vnum_s64
/// public static unsafe Vector<uint> LoadVectorInt16SignExtendToUInt32(Vector<uint> mask, const short *base, long vnum); // svld1sh_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(Vector<ulong> mask, const short *base, long vnum); // svld1sh_vnum_u64
/// public static unsafe Vector<int> LoadVectorInt16ZeroExtendNonFaultingToInt32(Vector<int> mask, const ushort *base, long vnum); // svldnf1uh_vnum_s32
/// public static unsafe Vector<long> LoadVectorInt16ZeroExtendNonFaultingToInt64(Vector<long> mask, const ushort *base, long vnum); // svldnf1uh_vnum_s64
/// public static unsafe Vector<uint> LoadVectorInt16ZeroExtendNonFaultingToUInt32(Vector<uint> mask, const ushort *base, long vnum); // svldnf1uh_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const ushort *base, long vnum); // svldnf1uh_vnum_u64
/// public static unsafe Vector<int> LoadVectorInt16ZeroExtendToInt32(Vector<int> mask, const ushort *base, long vnum); // svld1uh_vnum_s32
/// public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(Vector<long> mask, const ushort *base, long vnum); // svld1uh_vnum_s64
/// public static unsafe Vector<uint> LoadVectorInt16ZeroExtendToUInt32(Vector<uint> mask, const ushort *base, long vnum); // svld1uh_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(Vector<ulong> mask, const ushort *base, long vnum); // svld1uh_vnum_u64
/// public static unsafe Vector<long> LoadVectorInt32SignExtendNonFaultingToInt64(Vector<long> mask, const int *base, long vnum); // svldnf1sw_vnum_s64
/// public static unsafe Vector<ulong> LoadVectorInt32SignExtendNonFaultingToUInt64(Vector<ulong> mask, const int *base, long vnum); // svldnf1sw_vnum_u64
/// public static unsafe Vector<long> LoadVectorInt32SignExtendToInt64(Vector<long> mask, const int *base, long vnum); // svld1sw_vnum_s64
/// public static unsafe Vector<ulong> LoadVectorInt32SignExtendToUInt64(Vector<ulong> mask, const int *base, long vnum); // svld1sw_vnum_u64
/// public static unsafe Vector<long> LoadVectorInt32ZeroExtendNonFaultingToInt64(Vector<long> mask, const uint *base, long vnum); // svldnf1uw_vnum_s64
/// public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const uint *base, long vnum); // svldnf1uw_vnum_u64
/// public static unsafe Vector<long> LoadVectorInt32ZeroExtendToInt64(Vector<long> mask, const uint *base, long vnum); // svld1uw_vnum_s64
/// public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendToUInt64(Vector<ulong> mask, const uint *base, long vnum); // svld1uw_vnum_u64
/// public static unsafe Vector<float> LoadVectorNonFaulting(Vector<float> mask, const float *base, long vnum); // svldnf1_vnum[_f32]
/// public static unsafe Vector<double> LoadVectorNonFaulting(Vector<double> mask, const double *base, long vnum); // svldnf1_vnum[_f64]
/// public static unsafe Vector<sbyte> LoadVectorNonFaulting(Vector<sbyte> mask, const sbyte *base, long vnum); // svldnf1_vnum[_s8]
/// public static unsafe Vector<short> LoadVectorNonFaulting(Vector<short> mask, const short *base, long vnum); // svldnf1_vnum[_s16]
/// public static unsafe Vector<int> LoadVectorNonFaulting(Vector<int> mask, const int *base, long vnum); // svldnf1_vnum[_s32]
/// public static unsafe Vector<long> LoadVectorNonFaulting(Vector<long> mask, const long *base, long vnum); // svldnf1_vnum[_s64]
/// public static unsafe Vector<byte> LoadVectorNonFaulting(Vector<byte> mask, const byte *base, long vnum); // svldnf1_vnum[_u8]
/// public static unsafe Vector<ushort> LoadVectorNonFaulting(Vector<ushort> mask, const ushort *base, long vnum); // svldnf1_vnum[_u16]
/// public static unsafe Vector<uint> LoadVectorNonFaulting(Vector<uint> mask, const uint *base, long vnum); // svldnf1_vnum[_u32]
/// public static unsafe Vector<ulong> LoadVectorNonFaulting(Vector<ulong> mask, const ulong *base, long vnum); // svldnf1_vnum[_u64]
/// public static unsafe Vector<float> LoadVectorNonTemporal(Vector<float> mask, const float *base, long vnum); // svldnt1_vnum[_f32]
/// public static unsafe Vector<double> LoadVectorNonTemporal(Vector<double> mask, const double *base, long vnum); // svldnt1_vnum[_f64]
/// public static unsafe Vector<sbyte> LoadVectorNonTemporal(Vector<sbyte> mask, const sbyte *base, long vnum); // svldnt1_vnum[_s8]
/// public static unsafe Vector<short> LoadVectorNonTemporal(Vector<short> mask, const short *base, long vnum); // svldnt1_vnum[_s16]
/// public static unsafe Vector<int> LoadVectorNonTemporal(Vector<int> mask, const int *base, long vnum); // svldnt1_vnum[_s32]
/// public static unsafe Vector<long> LoadVectorNonTemporal(Vector<long> mask, const long *base, long vnum); // svldnt1_vnum[_s64]
/// public static unsafe Vector<byte> LoadVectorNonTemporal(Vector<byte> mask, const byte *base, long vnum); // svldnt1_vnum[_u8]
/// public static unsafe Vector<ushort> LoadVectorNonTemporal(Vector<ushort> mask, const ushort *base, long vnum); // svldnt1_vnum[_u16]
/// public static unsafe Vector<uint> LoadVectorNonTemporal(Vector<uint> mask, const uint *base, long vnum); // svldnt1_vnum[_u32]
/// public static unsafe Vector<ulong> LoadVectorNonTemporal(Vector<ulong> mask, const ulong *base, long vnum); // svldnt1_vnum[_u64]
/// public static unsafe (Vector<float>, Vector<float>) LoadVectorx2(Vector<float> mask, const float *base, long vnum); // svld2_vnum[_f32]
/// public static unsafe (Vector<double>, Vector<double>) LoadVectorx2(Vector<double> mask, const double *base, long vnum); // svld2_vnum[_f64]
/// public static unsafe (Vector<sbyte>, Vector<sbyte>) LoadVectorx2(Vector<sbyte> mask, const sbyte *base, long vnum); // svld2_vnum[_s8]
/// public static unsafe (Vector<short>, Vector<short>) LoadVectorx2(Vector<short> mask, const short *base, long vnum); // svld2_vnum[_s16]
/// public static unsafe (Vector<int>, Vector<int>) LoadVectorx2(Vector<int> mask, const int *base, long vnum); // svld2_vnum[_s32]
/// public static unsafe (Vector<long>, Vector<long>) LoadVectorx2(Vector<long> mask, const long *base, long vnum); // svld2_vnum[_s64]
/// public static unsafe (Vector<byte>, Vector<byte>) LoadVectorx2(Vector<byte> mask, const byte *base, long vnum); // svld2_vnum[_u8]
/// public static unsafe (Vector<ushort>, Vector<ushort>) LoadVectorx2(Vector<ushort> mask, const ushort *base, long vnum); // svld2_vnum[_u16]
/// public static unsafe (Vector<uint>, Vector<uint>) LoadVectorx2(Vector<uint> mask, const uint *base, long vnum); // svld2_vnum[_u32]
/// public static unsafe (Vector<ulong>, Vector<ulong>) LoadVectorx2(Vector<ulong> mask, const ulong *base, long vnum); // svld2_vnum[_u64]
/// public static unsafe (Vector<float>, Vector<float>, Vector<float>) LoadVectorx3(Vector<float> mask, const float *base, long vnum); // svld3_vnum[_f32]
/// public static unsafe (Vector<double>, Vector<double>, Vector<double>) LoadVectorx3(Vector<double> mask, const double *base, long vnum); // svld3_vnum[_f64]
/// public static unsafe (Vector<sbyte>, Vector<sbyte>, Vector<sbyte>) LoadVectorx3(Vector<sbyte> mask, const sbyte *base, long vnum); // svld3_vnum[_s8]
/// public static unsafe (Vector<short>, Vector<short>, Vector<short>) LoadVectorx3(Vector<short> mask, const short *base, long vnum); // svld3_vnum[_s16]
/// public static unsafe (Vector<int>, Vector<int>, Vector<int>) LoadVectorx3(Vector<int> mask, const int *base, long vnum); // svld3_vnum[_s32]
/// public static unsafe (Vector<long>, Vector<long>, Vector<long>) LoadVectorx3(Vector<long> mask, const long *base, long vnum); // svld3_vnum[_s64]
/// public static unsafe (Vector<byte>, Vector<byte>, Vector<byte>) LoadVectorx3(Vector<byte> mask, const byte *base, long vnum); // svld3_vnum[_u8]
/// public static unsafe (Vector<ushort>, Vector<ushort>, Vector<ushort>) LoadVectorx3(Vector<ushort> mask, const ushort *base, long vnum); // svld3_vnum[_u16]
/// public static unsafe (Vector<uint>, Vector<uint>, Vector<uint>) LoadVectorx3(Vector<uint> mask, const uint *base, long vnum); // svld3_vnum[_u32]
/// public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>) LoadVectorx3(Vector<ulong> mask, const ulong *base, long vnum); // svld3_vnum[_u64]
/// public static unsafe (Vector<float>, Vector<float>, Vector<float>, Vector<float>) LoadVectorx4(Vector<float> mask, const float *base, long vnum); // svld4_vnum[_f32]
/// public static unsafe (Vector<double>, Vector<double>, Vector<double>, Vector<double>) LoadVectorx4(Vector<double> mask, const double *base, long vnum); // svld4_vnum[_f64]
/// public static unsafe (Vector<sbyte>, Vector<sbyte>, Vector<sbyte>, Vector<sbyte>) LoadVectorx4(Vector<sbyte> mask, const sbyte *base, long vnum); // svld4_vnum[_s8]
/// public static unsafe (Vector<short>, Vector<short>, Vector<short>, Vector<short>) LoadVectorx4(Vector<short> mask, const short *base, long vnum); // svld4_vnum[_s16]
/// public static unsafe (Vector<int>, Vector<int>, Vector<int>, Vector<int>) LoadVectorx4(Vector<int> mask, const int *base, long vnum); // svld4_vnum[_s32]
/// public static unsafe (Vector<long>, Vector<long>, Vector<long>, Vector<long>) LoadVectorx4(Vector<long> mask, const long *base, long vnum); // svld4_vnum[_s64]
/// public static unsafe (Vector<byte>, Vector<byte>, Vector<byte>, Vector<byte>) LoadVectorx4(Vector<byte> mask, const byte *base, long vnum); // svld4_vnum[_u8]
/// public static unsafe (Vector<ushort>, Vector<ushort>, Vector<ushort>, Vector<ushort>) LoadVectorx4(Vector<ushort> mask, const ushort *base, long vnum); // svld4_vnum[_u16]
/// public static unsafe (Vector<uint>, Vector<uint>, Vector<uint>, Vector<uint>) LoadVectorx4(Vector<uint> mask, const uint *base, long vnum); // svld4_vnum[_u32]
/// public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>, Vector<ulong>) LoadVectorx4(Vector<ulong> mask, const ulong *base, long vnum); // svld4_vnum[_u64]
/// public static unsafe void PrefetchBytes(Vector<byte> mask, const void *base, long vnum, enum SvePrefetchType op); // svprfb_vnum
/// public static unsafe void PrefetchInt16(Vector<ushort> mask, const void *base, long vnum, enum SvePrefetchType op); // svprfh_vnum
/// public static unsafe void PrefetchInt32(Vector<uint> mask, const void *base, long vnum, enum SvePrefetchType op); // svprfw_vnum
/// public static unsafe void PrefetchInt64(Vector<ulong> mask, const void *base, long vnum, enum SvePrefetchType op); // svprfd_vnum
/// Total Maybe: 112
/// Total ACLE covered across API: 250
|
This contributes to #93095 It covers instructions in FEAT_SVE related to loads. Note there are more load methods in firstfaulting and gather loads This list was auto generated from the C ACLE for SVE, and is in three parts: The methods list reduced down to Vector versions. All possible varaints of T are given above the method. Many of the C functions include predicate argument(s), of type svbool_t as the first argument. These are missing from the C# method. It is expected that the Jit will create predicates where required, or combine with uses of conditionalSelect(). For more discussion see #88140 comment. |
I wonder if we can have a simpler name than
The The same general comment around |
This issue has been marked |
Done. Just spotted there are multiple variants here: The expected one: Return a vector in which element i contains bases[i] + offsets[i].
And three variants with shift: Return a vector in which element i contains bases[i] + indices[i] * N.
Call these variants
Done.
I've expanded these to: I think I may have gone too far? Can
done. |
I think they are all pretty much the same, which is "baseAddr + index". Correspondingly, While I think we could expose all possible permutations here, I think it'd be reasonable to scope it back to effectively:
Yes, because while we can't overload by return type, we can overload by parameters. So
We probably want |
All done. Maybe the following: public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(const short *base);
public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(const short *base);
public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(const ushort *base);
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(const ushort *base); should be: public static unsafe Vector<long> LoadVectorInt16ExtendToInt64(const short *base);
public static unsafe Vector<ulong> LoadVectorInt16ExtendToUInt64(const short *base);
public static unsafe Vector<long> LoadVectorUInt16ExtendToInt64(const ushort *base);
public static unsafe Vector<ulong> LoadVectorUInt16ExtendToUInt64(const ushort *base); ? |
I have a slight preference towards the former, it makes it clear the type of extension its doing. -- I've seen a lot of people confused and thing it zero or sign extends based on the destination type, when in practice its the input type that determines which is done. |
|
I don't think so. We don't have a name
|
namespace System.Runtime.Intrinsics.Arm;
/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE Category: loads
{
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute8BitAddresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute8BitAddresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute16BitAddresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute16BitAddresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute32BitAddresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute32BitAddresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute64BitAddresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute64BitAddresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector(T* address); // LD1W or LD1D or LD1B or LD1H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector128AndReplicateToVector(Vector<T> mask, T* address); // LD1RQW or LD1RQD or LD1RQB or LD1RQH
public static unsafe Vector<short> LoadVectorSByteNonFaultingSignExtendToInt16(Vector<short> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<int> LoadVectorSByteNonFaultingSignExtendToInt32(Vector<int> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<long> LoadVectorSByteNonFaultingSignExtendToInt64(Vector<long> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<ushort> LoadVectorSByteNonFaultingSignExtendToUInt16(Vector<ushort> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<uint> LoadVectorSByteNonFaultingSignExtendToUInt32(Vector<uint> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<ulong> LoadVectorSByteNonFaultingSignExtendToUInt64(Vector<ulong> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<short> LoadVectorSByteSignExtendToInt16(sbyte* address); // LD1SB
public static unsafe Vector<int> LoadVectorSByteSignExtendToInt32(sbyte* address); // LD1SB
public static unsafe Vector<long> LoadVectorSByteSignExtendToInt64(sbyte* address); // LD1SB
public static unsafe Vector<ushort> LoadVectorSByteSignExtendToUInt16(sbyte* address); // LD1SB
public static unsafe Vector<uint> LoadVectorSByteSignExtendToUInt32(sbyte* address); // LD1SB
public static unsafe Vector<ulong> LoadVectorSByteSignExtendToUInt64(sbyte* address); // LD1SB
public static unsafe Vector<short> LoadVectorByteNonFaultingZeroExtendToInt16(Vector<short> mask, byte* address); // LDNF1B
public static unsafe Vector<int> LoadVectorByteNonFaultingZeroExtendToInt32(Vector<int> mask, byte* address); // LDNF1B
public static unsafe Vector<long> LoadVectorByteNonFaultingZeroExtendToInt64(Vector<long> mask, byte* address); // LDNF1B
public static unsafe Vector<ushort> LoadVectorByteNonFaultingZeroExtendToUInt16(Vector<ushort> mask, byte* address); // LDNF1B
public static unsafe Vector<uint> LoadVectorByteNonFaultingZeroExtendToUInt32(Vector<uint> mask, byte* address); // LDNF1B
public static unsafe Vector<ulong> LoadVectorByteNonFaultingZeroExtendToUInt64(Vector<ulong> mask, byte* address); // LDNF1B
public static unsafe Vector<short> LoadVectorByteZeroExtendToInt16(byte* address); // LD1B
public static unsafe Vector<int> LoadVectorByteZeroExtendToInt32(byte* address); // LD1B
public static unsafe Vector<long> LoadVectorByteZeroExtendToInt64(byte* address); // LD1B
public static unsafe Vector<ushort> LoadVectorByteZeroExtendToUInt16(byte* address); // LD1B
public static unsafe Vector<uint> LoadVectorByteZeroExtendToUInt32(byte* address); // LD1B
public static unsafe Vector<ulong> LoadVectorByteZeroExtendToUInt64(byte* address); // LD1B
public static unsafe Vector<int> LoadVectorInt16NonFaultingSignExtendToInt32(Vector<int> mask, short* address); // LDNF1SH
public static unsafe Vector<long> LoadVectorInt16NonFaultingSignExtendToInt64(Vector<long> mask, short* address); // LDNF1SH
public static unsafe Vector<uint> LoadVectorInt16NonFaultingSignExtendToUInt32(Vector<uint> mask, short* address); // LDNF1SH
public static unsafe Vector<ulong> LoadVectorInt16NonFaultingSignExtendToUInt64(Vector<ulong> mask, short* address); // LDNF1SH
public static unsafe Vector<int> LoadVectorInt16SignExtendToInt32(short* address); // LD1SH
public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(short* address); // LD1SH
public static unsafe Vector<uint> LoadVectorInt16SignExtendToUInt32(short* address); // LD1SH
public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(short* address); // LD1SH
public static unsafe Vector<int> LoadVectorInt16NonFaultingZeroExtendToInt32(Vector<int> mask, ushort* address); // LDNF1H
public static unsafe Vector<long> LoadVectorInt16NonFaultingZeroExtendToInt64(Vector<long> mask, ushort* address); // LDNF1H
public static unsafe Vector<uint> LoadVectorInt16NonFaultingZeroExtendToUInt32(Vector<uint> mask, ushort* address); // LDNF1H
public static unsafe Vector<ulong> LoadVectorInt16NonFaultingZeroExtendToUInt64(Vector<ulong> mask, ushort* address); // LDNF1H
public static unsafe Vector<int> LoadVectorInt16ZeroExtendToInt32(ushort* address); // LD1H
public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(ushort* address); // LD1H
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendToUInt32(ushort* address); // LD1H
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(ushort* address); // LD1H
public static unsafe Vector<long> LoadVectorInt32NonFaultingSignExtendToInt64(Vector<long> mask, int* address); // LDNF1SW
public static unsafe Vector<ulong> LoadVectorInt32NonFaultingSignExtendToUInt64(Vector<ulong> mask, int* address); // LDNF1SW
public static unsafe Vector<long> LoadVectorInt32SignExtendToInt64(int* address); // LD1SW
public static unsafe Vector<ulong> LoadVectorInt32SignExtendToUInt64(int* address); // LD1SW
public static unsafe Vector<long> LoadVectorInt32NonFaultingZeroExtendToInt64(Vector<long> mask, uint* address); // LDNF1W
public static unsafe Vector<ulong> LoadVectorInt32NonFaultingZeroExtendToUInt64(Vector<ulong> mask, uint* address); // LDNF1W
public static unsafe Vector<long> LoadVectorInt32ZeroExtendToInt64(uint* address); // LD1W
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendToUInt64(uint* address); // LD1W
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonFaulting(Vector<T> mask, T* address); // LDNF1W or LDNF1D or LDNF1B or LDNF1H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonTemporal(T* address); // LDNT1W or LDNT1D or LDNT1B or LDNT1H
} |
These 4 should be
|
Not sure why I missed inserting four esses there; but fixed it in the approval post. |
We didn't quite get Non Faulting correct in the review. It is better understood when taken into account with first faulting. For all three instructions: The difference is in the active elements. If address of a element is invalid then, when loading it:
This can be seen in the pseudo code for the instructions. Potential faults loads use the function All three instructions have the same latency and throughput. Thoughts for the API:
|
When discussing the gather loads fragment we decided that /// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute8BitAddresses(Vector<T> addresses, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute8BitAddresses(Vector<T> addresses, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute16BitAddresses(Vector<T> addresses, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute16BitAddresses(Vector<T> addresses, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute32BitAddresses(Vector<T> addresses, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute32BitAddresses(Vector<T> addresses, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute64BitAddresses(Vector<T> addresses, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute64BitAddresses(Vector<T> addresses, Vector<T> indices); // ADR |
Follow-on comment: The NonFaulting instructions should remove the mask parameter, and it should be inserted into the ones that do fault. |
Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics Issue Detailsnamespace System.Runtime.Intrinsics.Arm
/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE Category: loads
{
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> ComputeByteAddresses(Vector<T> bases, Vector<T2> offsets); // ADR
/// T: uint, ulong
public static unsafe Vector<T> ComputeByteAddresses(Vector<T> bases, Vector<T> offsets); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> ComputeInt16Addresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> ComputeInt16Addresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> ComputeInt32Addresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> ComputeInt32Addresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> ComputeInt64Addresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> ComputeInt64Addresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector(Vector<T> mask, const T *base); // LD1W or LD1D or LD1B or LD1H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector128AndReplicateToVector(Vector<T> mask, const T *base); // LD1RQW or LD1RQD or LD1RQB or LD1RQH
public static unsafe Vector<short> LoadVectorByteSignExtendNonFaultingToInt16(Vector<short> mask, const sbyte *base); // LDNF1SB
public static unsafe Vector<int> LoadVectorByteSignExtendNonFaultingToInt32(Vector<int> mask, const sbyte *base); // LDNF1SB
public static unsafe Vector<long> LoadVectorByteSignExtendNonFaultingToInt64(Vector<long> mask, const sbyte *base); // LDNF1SB
public static unsafe Vector<ushort> LoadVectorByteSignExtendNonFaultingToUInt16(Vector<ushort> mask, const sbyte *base); // LDNF1SB
public static unsafe Vector<uint> LoadVectorByteSignExtendNonFaultingToUInt32(Vector<uint> mask, const sbyte *base); // LDNF1SB
public static unsafe Vector<ulong> LoadVectorByteSignExtendNonFaultingToUInt64(Vector<ulong> mask, const sbyte *base); // LDNF1SB
public static unsafe Vector<short> LoadVectorByteSignExtendToInt16(Vector<short> mask, const sbyte *base); // LD1SB
public static unsafe Vector<int> LoadVectorByteSignExtendToInt32(Vector<int> mask, const sbyte *base); // LD1SB
public static unsafe Vector<long> LoadVectorByteSignExtendToInt64(Vector<long> mask, const sbyte *base); // LD1SB
public static unsafe Vector<ushort> LoadVectorByteSignExtendToUInt16(Vector<ushort> mask, const sbyte *base); // LD1SB
public static unsafe Vector<uint> LoadVectorByteSignExtendToUInt32(Vector<uint> mask, const sbyte *base); // LD1SB
public static unsafe Vector<ulong> LoadVectorByteSignExtendToUInt64(Vector<ulong> mask, const sbyte *base); // LD1SB
public static unsafe Vector<short> LoadVectorByteZeroExtendNonFaultingToInt16(Vector<short> mask, const byte *base); // LDNF1B
public static unsafe Vector<int> LoadVectorByteZeroExtendNonFaultingToInt32(Vector<int> mask, const byte *base); // LDNF1B
public static unsafe Vector<long> LoadVectorByteZeroExtendNonFaultingToInt64(Vector<long> mask, const byte *base); // LDNF1B
public static unsafe Vector<ushort> LoadVectorByteZeroExtendNonFaultingToUInt16(Vector<ushort> mask, const byte *base); // LDNF1B
public static unsafe Vector<uint> LoadVectorByteZeroExtendNonFaultingToUInt32(Vector<uint> mask, const byte *base); // LDNF1B
public static unsafe Vector<ulong> LoadVectorByteZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const byte *base); // LDNF1B
public static unsafe Vector<short> LoadVectorByteZeroExtendToInt16(Vector<short> mask, const byte *base); // LD1B
public static unsafe Vector<int> LoadVectorByteZeroExtendToInt32(Vector<int> mask, const byte *base); // LD1B
public static unsafe Vector<long> LoadVectorByteZeroExtendToInt64(Vector<long> mask, const byte *base); // LD1B
public static unsafe Vector<ushort> LoadVectorByteZeroExtendToUInt16(Vector<ushort> mask, const byte *base); // LD1B
public static unsafe Vector<uint> LoadVectorByteZeroExtendToUInt32(Vector<uint> mask, const byte *base); // LD1B
public static unsafe Vector<ulong> LoadVectorByteZeroExtendToUInt64(Vector<ulong> mask, const byte *base); // LD1B
public static unsafe Vector<int> LoadVectorInt16SignExtendNonFaultingToInt32(Vector<int> mask, const short *base); // LDNF1SH
public static unsafe Vector<long> LoadVectorInt16SignExtendNonFaultingToInt64(Vector<long> mask, const short *base); // LDNF1SH
public static unsafe Vector<uint> LoadVectorInt16SignExtendNonFaultingToUInt32(Vector<uint> mask, const short *base); // LDNF1SH
public static unsafe Vector<ulong> LoadVectorInt16SignExtendNonFaultingToUInt64(Vector<ulong> mask, const short *base); // LDNF1SH
public static unsafe Vector<int> LoadVectorInt16SignExtendToInt32(Vector<int> mask, const short *base); // LD1SH
public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(Vector<long> mask, const short *base); // LD1SH
public static unsafe Vector<uint> LoadVectorInt16SignExtendToUInt32(Vector<uint> mask, const short *base); // LD1SH
public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(Vector<ulong> mask, const short *base); // LD1SH
public static unsafe Vector<int> LoadVectorInt16ZeroExtendNonFaultingToInt32(Vector<int> mask, const ushort *base); // LDNF1H
public static unsafe Vector<long> LoadVectorInt16ZeroExtendNonFaultingToInt64(Vector<long> mask, const ushort *base); // LDNF1H
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendNonFaultingToUInt32(Vector<uint> mask, const ushort *base); // LDNF1H
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const ushort *base); // LDNF1H
public static unsafe Vector<int> LoadVectorInt16ZeroExtendToInt32(Vector<int> mask, const ushort *base); // LD1H
public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(Vector<long> mask, const ushort *base); // LD1H
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendToUInt32(Vector<uint> mask, const ushort *base); // LD1H
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(Vector<ulong> mask, const ushort *base); // LD1H
public static unsafe Vector<long> LoadVectorInt32SignExtendNonFaultingToInt64(Vector<long> mask, const int *base); // LDNF1SW
public static unsafe Vector<ulong> LoadVectorInt32SignExtendNonFaultingToUInt64(Vector<ulong> mask, const int *base); // LDNF1SW
public static unsafe Vector<long> LoadVectorInt32SignExtendToInt64(Vector<long> mask, const int *base); // LD1SW
public static unsafe Vector<ulong> LoadVectorInt32SignExtendToUInt64(Vector<ulong> mask, const int *base); // LD1SW
public static unsafe Vector<long> LoadVectorInt32ZeroExtendNonFaultingToInt64(Vector<long> mask, const uint *base); // LDNF1W
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const uint *base); // LDNF1W
public static unsafe Vector<long> LoadVectorInt32ZeroExtendToInt64(Vector<long> mask, const uint *base); // LD1W
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendToUInt64(Vector<ulong> mask, const uint *base); // LD1W
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonFaulting(Vector<T> mask, const T *base); // LDNF1W or LDNF1D or LDNF1B or LDNF1H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonTemporal(Vector<T> mask, const T *base); // LDNT1W or LDNT1D or LDNT1B or LDNT1H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>) LoadVectorx2(Vector<T> mask, const T *base); // LD2W or LD2D or LD2B or LD2H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>, Vector<T>) LoadVectorx3(Vector<T> mask, const T *base); // LD3W or LD3D or LD3B or LD3H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>, Vector<T>, Vector<T>) LoadVectorx4(Vector<T> mask, const T *base); // LD4W or LD4D or LD4B or LD4H
public static unsafe void PrefetchBytes(Vector<byte> mask, const void *base, enum SvePrefetchType op); // PRFB
public static unsafe void PrefetchInt16(Vector<ushort> mask, const void *base, enum SvePrefetchType op); // PRFH
public static unsafe void PrefetchInt32(Vector<uint> mask, const void *base, enum SvePrefetchType op); // PRFW
public static unsafe void PrefetchInt64(Vector<ulong> mask, const void *base, enum SvePrefetchType op); // PRFD
// All possible prefetch types
enum SvePrefetchType
{
SV_PLDL1KEEP = 0, // Temporal fetch the addressed location for reading, to L1 cache.
SV_PLDL1STRM = 1, // Streaming fetch the addressed location for reading, to L1 cache.
SV_PLDL2KEEP = 2, // Temporal fetch the addressed location for reading, to L2 cache.
SV_PLDL2STRM = 3, // Streaming fetch the addressed location for reading, to L2 cache.
SV_PLDL3KEEP = 4, // Temporal fetch the addressed location for reading, to L3 cache.
SV_PLDL3STRM = 5, // Streaming fetch the addressed location for reading, to L3 cache.
SV_PSTL1KEEP = 8, // Temporal fetch the addressed location for writing, to L1 cache.
SV_PSTL1STRM = 9, // Streaming fetch the addressed location for writing, to L1 cache.
SV_PSTL2KEEP = 10, // Temporal fetch the addressed location for writing, to L2 cache.
SV_PSTL2STRM = 11, // Streaming fetch the addressed location for writing, to L2 cache.
SV_PSTL3KEEP = 12, // Temporal fetch the addressed location for writing, to L3 cache.
SV_PSTL3STRM = 13 // Streaming fetch the addressed location for writing, to L3 cache.
};
/// total method signatures: 67
/// Optional Entries:
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector(Vector<T> mask, const T *base, long vnum); // LD1W or LD1D or LD1B or LD1H
public static unsafe Vector<long> LoadVector(Vector<long> mask, const long *base, long vnum); // LD1D
public static unsafe Vector<short> LoadVectorByteSignExtendNonFaultingToInt16(Vector<short> mask, const sbyte *base, long vnum); // LDNF1SB
public static unsafe Vector<int> LoadVectorByteSignExtendNonFaultingToInt32(Vector<int> mask, const sbyte *base, long vnum); // LDNF1SB
public static unsafe Vector<long> LoadVectorByteSignExtendNonFaultingToInt64(Vector<long> mask, const sbyte *base, long vnum); // LDNF1SB
public static unsafe Vector<ushort> LoadVectorByteSignExtendNonFaultingToUInt16(Vector<ushort> mask, const sbyte *base, long vnum); // LDNF1SB
public static unsafe Vector<uint> LoadVectorByteSignExtendNonFaultingToUInt32(Vector<uint> mask, const sbyte *base, long vnum); // LDNF1SB
public static unsafe Vector<ulong> LoadVectorByteSignExtendNonFaultingToUInt64(Vector<ulong> mask, const sbyte *base, long vnum); // LDNF1SB
public static unsafe Vector<short> LoadVectorByteSignExtendToInt16(Vector<short> mask, const sbyte *base, long vnum); // LD1SB
public static unsafe Vector<int> LoadVectorByteSignExtendToInt32(Vector<int> mask, const sbyte *base, long vnum); // LD1SB
public static unsafe Vector<long> LoadVectorByteSignExtendToInt64(Vector<long> mask, const sbyte *base, long vnum); // LD1SB
public static unsafe Vector<ushort> LoadVectorByteSignExtendToUInt16(Vector<ushort> mask, const sbyte *base, long vnum); // LD1SB
public static unsafe Vector<uint> LoadVectorByteSignExtendToUInt32(Vector<uint> mask, const sbyte *base, long vnum); // LD1SB
public static unsafe Vector<ulong> LoadVectorByteSignExtendToUInt64(Vector<ulong> mask, const sbyte *base, long vnum); // LD1SB
public static unsafe Vector<short> LoadVectorByteZeroExtendNonFaultingToInt16(Vector<short> mask, const byte *base, long vnum); // LDNF1B
public static unsafe Vector<int> LoadVectorByteZeroExtendNonFaultingToInt32(Vector<int> mask, const byte *base, long vnum); // LDNF1B
public static unsafe Vector<long> LoadVectorByteZeroExtendNonFaultingToInt64(Vector<long> mask, const byte *base, long vnum); // LDNF1B
public static unsafe Vector<ushort> LoadVectorByteZeroExtendNonFaultingToUInt16(Vector<ushort> mask, const byte *base, long vnum); // LDNF1B
public static unsafe Vector<uint> LoadVectorByteZeroExtendNonFaultingToUInt32(Vector<uint> mask, const byte *base, long vnum); // LDNF1B
public static unsafe Vector<ulong> LoadVectorByteZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const byte *base, long vnum); // LDNF1B
public static unsafe Vector<short> LoadVectorByteZeroExtendToInt16(Vector<short> mask, const byte *base, long vnum); // LD1B
public static unsafe Vector<int> LoadVectorByteZeroExtendToInt32(Vector<int> mask, const byte *base, long vnum); // LD1B
public static unsafe Vector<long> LoadVectorByteZeroExtendToInt64(Vector<long> mask, const byte *base, long vnum); // LD1B
public static unsafe Vector<ushort> LoadVectorByteZeroExtendToUInt16(Vector<ushort> mask, const byte *base, long vnum); // LD1B
public static unsafe Vector<uint> LoadVectorByteZeroExtendToUInt32(Vector<uint> mask, const byte *base, long vnum); // LD1B
public static unsafe Vector<ulong> LoadVectorByteZeroExtendToUInt64(Vector<ulong> mask, const byte *base, long vnum); // LD1B
public static unsafe Vector<int> LoadVectorInt16SignExtendNonFaultingToInt32(Vector<int> mask, const short *base, long vnum); // LDNF1SH
public static unsafe Vector<long> LoadVectorInt16SignExtendNonFaultingToInt64(Vector<long> mask, const short *base, long vnum); // LDNF1SH
public static unsafe Vector<uint> LoadVectorInt16SignExtendNonFaultingToUInt32(Vector<uint> mask, const short *base, long vnum); // LDNF1SH
public static unsafe Vector<ulong> LoadVectorInt16SignExtendNonFaultingToUInt64(Vector<ulong> mask, const short *base, long vnum); // LDNF1SH
public static unsafe Vector<int> LoadVectorInt16SignExtendToInt32(Vector<int> mask, const short *base, long vnum); // LD1SH
public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(Vector<long> mask, const short *base, long vnum); // LD1SH
public static unsafe Vector<uint> LoadVectorInt16SignExtendToUInt32(Vector<uint> mask, const short *base, long vnum); // LD1SH
public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(Vector<ulong> mask, const short *base, long vnum); // LD1SH
public static unsafe Vector<int> LoadVectorInt16ZeroExtendNonFaultingToInt32(Vector<int> mask, const ushort *base, long vnum); // LDNF1H
public static unsafe Vector<long> LoadVectorInt16ZeroExtendNonFaultingToInt64(Vector<long> mask, const ushort *base, long vnum); // LDNF1H
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendNonFaultingToUInt32(Vector<uint> mask, const ushort *base, long vnum); // LDNF1H
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const ushort *base, long vnum); // LDNF1H
public static unsafe Vector<int> LoadVectorInt16ZeroExtendToInt32(Vector<int> mask, const ushort *base, long vnum); // LD1H
public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(Vector<long> mask, const ushort *base, long vnum); // LD1H
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendToUInt32(Vector<uint> mask, const ushort *base, long vnum); // LD1H
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(Vector<ulong> mask, const ushort *base, long vnum); // LD1H
public static unsafe Vector<long> LoadVectorInt32SignExtendNonFaultingToInt64(Vector<long> mask, const int *base, long vnum); // LDNF1SW
public static unsafe Vector<ulong> LoadVectorInt32SignExtendNonFaultingToUInt64(Vector<ulong> mask, const int *base, long vnum); // LDNF1SW
public static unsafe Vector<long> LoadVectorInt32SignExtendToInt64(Vector<long> mask, const int *base, long vnum); // LD1SW
public static unsafe Vector<ulong> LoadVectorInt32SignExtendToUInt64(Vector<ulong> mask, const int *base, long vnum); // LD1SW
public static unsafe Vector<long> LoadVectorInt32ZeroExtendNonFaultingToInt64(Vector<long> mask, const uint *base, long vnum); // LDNF1W
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const uint *base, long vnum); // LDNF1W
public static unsafe Vector<long> LoadVectorInt32ZeroExtendToInt64(Vector<long> mask, const uint *base, long vnum); // LD1W
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendToUInt64(Vector<ulong> mask, const uint *base, long vnum); // LD1W
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonFaulting(Vector<T> mask, const T *base, long vnum); // LDNF1W or LDNF1D or LDNF1B or LDNF1H
public static unsafe Vector<long> LoadVectorNonFaulting(Vector<long> mask, const long *base, long vnum); // LDNF1D
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonTemporal(Vector<T> mask, const T *base, long vnum); // LDNT1W or LDNT1D or LDNT1B or LDNT1H
public static unsafe Vector<long> LoadVectorNonTemporal(Vector<long> mask, const long *base, long vnum); // LDNT1D
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>) LoadVectorx2(Vector<T> mask, const T *base, long vnum); // LD2W or LD2D or LD2B or LD2H
public static unsafe (Vector<long>, Vector<long>) LoadVectorx2(Vector<long> mask, const long *base, long vnum); // LD2D
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>, Vector<T>) LoadVectorx3(Vector<T> mask, const T *base, long vnum); // LD3W or LD3D or LD3B or LD3H
public static unsafe (Vector<long>, Vector<long>, Vector<long>) LoadVectorx3(Vector<long> mask, const long *base, long vnum); // LD3D
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe (Vector<T>, Vector<T>, Vector<T>, Vector<T>) LoadVectorx4(Vector<T> mask, const T *base, long vnum); // LD4W or LD4D or LD4B or LD4H
public static unsafe (Vector<long>, Vector<long>, Vector<long>, Vector<long>) LoadVectorx4(Vector<long> mask, const long *base, long vnum); // LD4D
public static unsafe void PrefetchBytes(Vector<byte> mask, const void *base, long vnum, enum SvePrefetchType op); // PRFB
public static unsafe void PrefetchInt16(Vector<ushort> mask, const void *base, long vnum, enum SvePrefetchType op); // PRFH
public static unsafe void PrefetchInt32(Vector<uint> mask, const void *base, long vnum, enum SvePrefetchType op); // PRFW
public static unsafe void PrefetchInt64(Vector<ulong> mask, const void *base, long vnum, enum SvePrefetchType op); // PRFD
/// total optional method signatures: 64
}
|
We discussed this during the API review:
As I recall, we decided this was the correct behaviour for the API. Just wanted to confirm as we'll be looking to implement many of these soon. |
It should still be correct. In general we want to rely on |
Agreed. We have a few of those (namely everything not named NonFaulting or FirstFaulting). For example, in LD1H "Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector." We have that as: That needs extending to be |
The text was updated successfully, but these errors were encountered: