Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Sve.TransposeEven/Odd() #103068

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1855,6 +1855,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
break;

case NI_Sve_TransposeEven:
case NI_Sve_TransposeOdd:
case NI_Sve_UnzipEven:
case NI_Sve_UnzipOdd:
case NI_Sve_ZipHigh:
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ HARDWARE_INTRINSIC(Sve, StoreNarrowing,
HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, true, {INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, true, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, true, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, Xor, -1, -1, false, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4254,6 +4254,132 @@ internal Arm64() { }
public static unsafe Vector<long> SignExtendWideningUpper(Vector<int> value) { throw new PlatformNotSupportedException(); }


/// Interleave even elements from two inputs

/// <summary>
/// svuint8_t svtrn1[_u8](svuint8_t op1, svuint8_t op2)
/// TRN1 Zresult.B, Zop1.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> TransposeEven(Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svtrn1[_f64](svfloat64_t op1, svfloat64_t op2)
/// TRN1 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<double> TransposeEven(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svtrn1[_s16](svint16_t op1, svint16_t op2)
/// TRN1 Zresult.H, Zop1.H, Zop2.H
/// </summary>
public static unsafe Vector<short> TransposeEven(Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svtrn1[_s32](svint32_t op1, svint32_t op2)
/// TRN1 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<int> TransposeEven(Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svtrn1[_s64](svint64_t op1, svint64_t op2)
/// TRN1 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<long> TransposeEven(Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svtrn1[_s8](svint8_t op1, svint8_t op2)
/// TRN1 Zresult.B, Zop1.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> TransposeEven(Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svtrn1[_f32](svfloat32_t op1, svfloat32_t op2)
/// TRN1 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<float> TransposeEven(Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svtrn1[_u16](svuint16_t op1, svuint16_t op2)
/// TRN1 Zresult.H, Zop1.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> TransposeEven(Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svtrn1[_u32](svuint32_t op1, svuint32_t op2)
/// TRN1 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> TransposeEven(Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svtrn1[_u64](svuint64_t op1, svuint64_t op2)
/// TRN1 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> TransposeEven(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


/// Interleave odd elements from two inputs

/// <summary>
/// svuint8_t svtrn2[_u8](svuint8_t op1, svuint8_t op2)
/// TRN2 Zresult.B, Zop1.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> TransposeOdd(Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svtrn2[_f64](svfloat64_t op1, svfloat64_t op2)
/// TRN2 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<double> TransposeOdd(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svtrn2[_s16](svint16_t op1, svint16_t op2)
/// TRN2 Zresult.H, Zop1.H, Zop2.H
/// </summary>
public static unsafe Vector<short> TransposeOdd(Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svtrn2[_s32](svint32_t op1, svint32_t op2)
/// TRN2 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<int> TransposeOdd(Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svtrn2[_s64](svint64_t op1, svint64_t op2)
/// TRN2 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<long> TransposeOdd(Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svtrn2[_s8](svint8_t op1, svint8_t op2)
/// TRN2 Zresult.B, Zop1.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> TransposeOdd(Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svtrn2[_f32](svfloat32_t op1, svfloat32_t op2)
/// TRN2 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<float> TransposeOdd(Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svtrn2[_u16](svuint16_t op1, svuint16_t op2)
/// TRN2 Zresult.H, Zop1.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> TransposeOdd(Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svtrn2[_u32](svuint32_t op1, svuint32_t op2)
/// TRN2 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> TransposeOdd(Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svtrn2[_u64](svuint64_t op1, svuint64_t op2)
/// TRN2 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> TransposeOdd(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


/// UnzipEven : Concatenate even elements from two inputs

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4298,6 +4298,132 @@ internal Arm64() { }
public static unsafe Vector<ulong> SubtractSaturate(Vector<ulong> left, Vector<ulong> right) => SubtractSaturate(left, right);


/// Interleave even elements from two inputs

/// <summary>
/// svuint8_t svtrn1[_u8](svuint8_t op1, svuint8_t op2)
/// TRN1 Zresult.B, Zop1.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> TransposeEven(Vector<byte> left, Vector<byte> right) => TransposeEven(left, right);

/// <summary>
/// svfloat64_t svtrn1[_f64](svfloat64_t op1, svfloat64_t op2)
/// TRN1 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<double> TransposeEven(Vector<double> left, Vector<double> right) => TransposeEven(left, right);

/// <summary>
/// svint16_t svtrn1[_s16](svint16_t op1, svint16_t op2)
/// TRN1 Zresult.H, Zop1.H, Zop2.H
/// </summary>
public static unsafe Vector<short> TransposeEven(Vector<short> left, Vector<short> right) => TransposeEven(left, right);

/// <summary>
/// svint32_t svtrn1[_s32](svint32_t op1, svint32_t op2)
/// TRN1 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<int> TransposeEven(Vector<int> left, Vector<int> right) => TransposeEven(left, right);

/// <summary>
/// svint64_t svtrn1[_s64](svint64_t op1, svint64_t op2)
/// TRN1 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<long> TransposeEven(Vector<long> left, Vector<long> right) => TransposeEven(left, right);

/// <summary>
/// svint8_t svtrn1[_s8](svint8_t op1, svint8_t op2)
/// TRN1 Zresult.B, Zop1.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> TransposeEven(Vector<sbyte> left, Vector<sbyte> right) => TransposeEven(left, right);

/// <summary>
/// svfloat32_t svtrn1[_f32](svfloat32_t op1, svfloat32_t op2)
/// TRN1 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<float> TransposeEven(Vector<float> left, Vector<float> right) => TransposeEven(left, right);

/// <summary>
/// svuint16_t svtrn1[_u16](svuint16_t op1, svuint16_t op2)
/// TRN1 Zresult.H, Zop1.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> TransposeEven(Vector<ushort> left, Vector<ushort> right) => TransposeEven(left, right);

/// <summary>
/// svuint32_t svtrn1[_u32](svuint32_t op1, svuint32_t op2)
/// TRN1 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> TransposeEven(Vector<uint> left, Vector<uint> right) => TransposeEven(left, right);

/// <summary>
/// svuint64_t svtrn1[_u64](svuint64_t op1, svuint64_t op2)
/// TRN1 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> TransposeEven(Vector<ulong> left, Vector<ulong> right) => TransposeEven(left, right);


/// Interleave odd elements from two inputs

/// <summary>
/// svuint8_t svtrn2[_u8](svuint8_t op1, svuint8_t op2)
/// TRN2 Zresult.B, Zop1.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> TransposeOdd(Vector<byte> left, Vector<byte> right) => TransposeOdd(left, right);

/// <summary>
/// svfloat64_t svtrn2[_f64](svfloat64_t op1, svfloat64_t op2)
/// TRN2 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<double> TransposeOdd(Vector<double> left, Vector<double> right) => TransposeOdd(left, right);

/// <summary>
/// svint16_t svtrn2[_s16](svint16_t op1, svint16_t op2)
/// TRN2 Zresult.H, Zop1.H, Zop2.H
/// </summary>
public static unsafe Vector<short> TransposeOdd(Vector<short> left, Vector<short> right) => TransposeOdd(left, right);

/// <summary>
/// svint32_t svtrn2[_s32](svint32_t op1, svint32_t op2)
/// TRN2 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<int> TransposeOdd(Vector<int> left, Vector<int> right) => TransposeOdd(left, right);

/// <summary>
/// svint64_t svtrn2[_s64](svint64_t op1, svint64_t op2)
/// TRN2 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<long> TransposeOdd(Vector<long> left, Vector<long> right) => TransposeOdd(left, right);

/// <summary>
/// svint8_t svtrn2[_s8](svint8_t op1, svint8_t op2)
/// TRN2 Zresult.B, Zop1.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> TransposeOdd(Vector<sbyte> left, Vector<sbyte> right) => TransposeOdd(left, right);

/// <summary>
/// svfloat32_t svtrn2[_f32](svfloat32_t op1, svfloat32_t op2)
/// TRN2 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<float> TransposeOdd(Vector<float> left, Vector<float> right) => TransposeOdd(left, right);

/// <summary>
/// svuint16_t svtrn2[_u16](svuint16_t op1, svuint16_t op2)
/// TRN2 Zresult.H, Zop1.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> TransposeOdd(Vector<ushort> left, Vector<ushort> right) => TransposeOdd(left, right);

/// <summary>
/// svuint32_t svtrn2[_u32](svuint32_t op1, svuint32_t op2)
/// TRN2 Zresult.S, Zop1.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> TransposeOdd(Vector<uint> left, Vector<uint> right) => TransposeOdd(left, right);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for this PR, but wonder for these type of APIs that has predicates version, e.g. TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>, what happens when we do something like TransposeOdd(CreateTrueMask(), CreateTrueMask())?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's interesting. As the masks are treated as regular vectors, it would act as using Vectors of 1s and 0s. I wonder if that's good enough for us while writing C# code. If someone want to operate on masks, they can just use the vector version and then use the result as a mask for the next instructions 🤔 .

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@a74nh - any thoughts on this? There are many APIs that fall in this category.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the code as it is, it's going to be converting to vectors, using the vector version, then converting back to mask. Which isn't ideal.

should be fairly easy to add some checks. If all inputs are all masks converted to vectors, then remove the convert to vectors. Then in codegen, if inputs are masks then use the mask versions.

We probably want an issue for this to track it. Then enable one by one.

Probably best to do this after implementing all the APIs so that we get all functionality done first.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


/// <summary>
/// svuint64_t svtrn2[_u64](svuint64_t op1, svuint64_t op2)
/// TRN2 Zresult.D, Zop1.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> TransposeOdd(Vector<ulong> left, Vector<ulong> right) => TransposeOdd(left, right);


/// UnzipEven : Concatenate even elements from two inputs

/// <summary>
Expand Down
Loading
Loading