diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 048d6eced96d0..4e0b51f0026d4 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4554,6 +4554,11 @@ class Compiler NamedIntrinsic intrinsic, GenTree* immOp, bool mustExpand, int immLowerBound, int immUpperBound); GenTree* addRangeCheckForHWIntrinsic(GenTree* immOp, int immLowerBound, int immUpperBound); +#if defined(TARGET_ARM64) + GenTree* convertHWIntrinsicToMask(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* convertHWIntrinsicFromMask(GenTreeHWIntrinsic* node, var_types type); +#endif + #endif // FEATURE_HW_INTRINSICS GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index af688a3678fed..694ef50a6a3df 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -7303,6 +7303,34 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) } } +// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement. +// Asserts and returns INS_OPTS_NONE if an invalid 'elemsize' is passed +// +/*static*/ insOpts emitter::optGetSveInsOpt(emitAttr elemsize) +{ + switch (elemsize) + { + case EA_1BYTE: + return INS_OPTS_SCALABLE_B; + + case EA_2BYTE: + return INS_OPTS_SCALABLE_H; + + case EA_4BYTE: + return INS_OPTS_SCALABLE_S; + + case EA_8BYTE: + return INS_OPTS_SCALABLE_D; + + case EA_16BYTE: + return INS_OPTS_SCALABLE_Q; + + default: + assert(!"Invalid emitAttr for sve vector register"); + return INS_OPTS_NONE; + } +} + // For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement // asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed // @@ -13020,6 +13048,12 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_HP_3A; break; + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ld1w: + case INS_sve_ld1d: + return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt); + default: unreached(); break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index d17ccd483885d..0ef72d8b750c0 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -1138,6 +1138,9 @@ static emitAttr optGetDatasize(insOpts arrangement); // For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement static emitAttr optGetElemsize(insOpts arrangement); +// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement. +static insOpts optGetSveInsOpt(emitAttr elemsize); + // For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement static emitAttr optGetSveElemsize(insOpts arrangement); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 505ad72fd6946..458117f8ebfb6 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26067,9 +26067,12 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2: case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3: case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4: - addr = Op(3); break; + + case NI_Sve_LoadVector: + addr = Op(2); + break; #endif // TARGET_ARM64 default: diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index f771a9ec978e2..14c262524da2d 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1356,6 +1356,15 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, compFloatingPointUsed = true; } + var_types nodeRetType = retType; +#if defined(TARGET_ARM64) + if (HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)) + { + // Ensure the result is generated to a mask. + nodeRetType = TYP_MASK; + } +#endif // defined(TARGET_ARM64) + // table-driven importer of simple intrinsics if (impIsTableDrivenHWIntrinsic(intrinsic, category)) { @@ -1392,7 +1401,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case 0: { assert(!isScalar); - retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(nodeRetType, intrinsic, simdBaseJitType, simdSize); break; } @@ -1410,8 +1419,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } } - retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, intrinsic) - : gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); + retNode = isScalar ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, intrinsic) + : gtNewSimdHWIntrinsicNode(nodeRetType, op1, intrinsic, simdBaseJitType, simdSize); #if defined(TARGET_XARCH) switch (intrinsic) @@ -1462,8 +1471,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound); op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); - retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic) - : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + retNode = isScalar + ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, intrinsic) + : gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, intrinsic, simdBaseJitType, simdSize); #ifdef TARGET_XARCH if ((intrinsic == NI_SSE42_Crc32) || (intrinsic == NI_SSE42_X64_Crc32)) @@ -1543,9 +1553,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand, immLowerBound, immUpperBound); } - retNode = isScalar - ? gtNewScalarHWIntrinsicNode(retType, op1, op2, op3, intrinsic) - : gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + retNode = isScalar ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic) + : gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType, + simdSize); #ifdef TARGET_XARCH if ((intrinsic == NI_AVX2_GatherVector128) || (intrinsic == NI_AVX2_GatherVector256)) @@ -1566,7 +1576,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); assert(!isScalar); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); + retNode = + gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); break; } @@ -1576,8 +1587,26 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } else { - retNode = impSpecialIntrinsic(intrinsic, clsHnd, method, sig, simdBaseJitType, retType, simdSize); + retNode = impSpecialIntrinsic(intrinsic, clsHnd, method, sig, simdBaseJitType, nodeRetType, simdSize); + } + +#if defined(TARGET_ARM64) + if (HWIntrinsicInfo::IsMaskedOperation(intrinsic)) + { + // Op1 input is a vector. HWInstrinsic requires a mask, so convert to a mask. + assert(numArgs > 0); + GenTree* op1 = retNode->AsHWIntrinsic()->Op(1); + op1 = convertHWIntrinsicToMask(retType, op1, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->Op(1) = op1; + } + + if (retType != nodeRetType) + { + // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. + assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); + retNode = convertHWIntrinsicFromMask(retNode->AsHWIntrinsic(), retType); } +#endif // defined(TARGET_ARM64) if ((retNode != nullptr) && retNode->OperIs(GT_HWINTRINSIC)) { diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 15256ea22e93b..cac041eb83ea6 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -58,6 +58,7 @@ enum HWIntrinsicCategory : uint8_t HW_Category_ShiftLeftByImmediate, HW_Category_ShiftRightByImmediate, HW_Category_SIMDByIndexedElement, + HW_Category_EnumPattern, // Helper intrinsics // - do not directly correspond to a instruction, such as Vector64.AllBitsSet @@ -175,6 +176,21 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic needs consecutive registers HW_Flag_NeedsConsecutiveRegisters = 0x4000, + + // The intrinsic uses scalable registers + HW_Flag_Scalable = 0x8000, + + // Returns Per-Element Mask + // the intrinsic returns a vector containing elements that are either "all bits set" or "all bits clear" + // this output can be used as a per-element mask + HW_Flag_ReturnsPerElementMask = 0x10000, + + // The intrinsic uses a mask in arg1 to select elements present in the result + HW_Flag_MaskedOperation = 0x20000, + + // The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low register. + HW_Flag_LowMaskedOperation = 0x40000, + #else #error Unsupported platform #endif @@ -654,10 +670,8 @@ struct HWIntrinsicInfo static bool ReturnsPerElementMask(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) return (flags & HW_Flag_ReturnsPerElementMask) != 0; -#elif defined(TARGET_ARM64) - unreached(); #else #error Unsupported platform #endif @@ -848,6 +862,25 @@ struct HWIntrinsicInfo const HWIntrinsicFlag flags = lookupFlags(id); return (flags & HW_Flag_HasImmediateOperand) != 0; } + + static bool IsScalable(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_Scalable) != 0; + } + + static bool IsMaskedOperation(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return ((flags & HW_Flag_MaskedOperation) != 0) || IsLowMaskedOperation(id); + } + + static bool IsLowMaskedOperation(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_LowMaskedOperation) != 0; + } + #endif // TARGET_ARM64 static bool HasSpecialSideEffect(NamedIntrinsic id) @@ -907,7 +940,7 @@ struct HWIntrinsic final InitializeBaseType(node); } - bool IsTableDriven() const + bool codeGenIsTableDriven() const { // TODO-Arm64-Cleanup - make more categories to the table-driven framework bool isTableDrivenCategory = category != HW_Category_Helper; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 0561ac2adadd6..5c7f796c61c90 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -280,6 +280,20 @@ void HWIntrinsicInfo::lookupImmBounds( immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1; break; + case NI_Sve_CreateTrueMaskByte: + case NI_Sve_CreateTrueMaskDouble: + case NI_Sve_CreateTrueMaskInt16: + case NI_Sve_CreateTrueMaskInt32: + case NI_Sve_CreateTrueMaskInt64: + case NI_Sve_CreateTrueMaskSByte: + case NI_Sve_CreateTrueMaskSingle: + case NI_Sve_CreateTrueMaskUInt16: + case NI_Sve_CreateTrueMaskUInt32: + case NI_Sve_CreateTrueMaskUInt64: + immLowerBound = (int)SVE_PATTERN_POW2; + immUpperBound = (int)SVE_PATTERN_ALL; + break; + default: unreached(); } @@ -2179,6 +2193,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } + default: { return nullptr; @@ -2188,4 +2203,43 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return retNode; } +//------------------------------------------------------------------------ +// convertHWIntrinsicFromMask: Convert a HW instrinsic vector node to a mask +// +// Arguments: +// node -- The node to convert +// simdBaseJitType -- the base jit type of the converted node +// simdSize -- the simd size of the converted node +// +// Return Value: +// The node converted to the a mask type +// +GenTree* Compiler::convertHWIntrinsicToMask(var_types type, + GenTree* node, + CorInfoType simdBaseJitType, + unsigned simdSize) +{ + // ConvertVectorToMask uses cmpne which requires an embedded mask. + GenTree* embeddedMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(TYP_MASK, embeddedMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType, + simdSize); +} + +//------------------------------------------------------------------------ +// convertHWIntrinsicFromMask: Convert a HW instrinsic mask node to a vector +// +// Arguments: +// node -- The node to convert +// type -- The type of the node to convert to +// +// Return Value: +// The node converted to the given type +// +GenTree* Compiler::convertHWIntrinsicFromMask(GenTreeHWIntrinsic* node, var_types type) +{ + assert(node->TypeGet() == TYP_MASK); + return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, node->GetSimdBaseJitType(), + node->GetSimdSize()); +} + #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index eba1b6f33a09c..6418b72a8f307 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -265,6 +265,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) emitSize = EA_UNKNOWN; opt = INS_OPTS_NONE; } + else if (HWIntrinsicInfo::IsScalable(intrin.id)) + { + emitSize = EA_SCALABLE; + opt = emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType)); + } else { emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); @@ -276,7 +281,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) genConsumeMultiOpOperands(node); - if (intrin.IsTableDriven()) + if (intrin.codeGenIsTableDriven()) { const instruction ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType); assert(ins != INS_invalid); @@ -372,6 +377,27 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) emitShift(intrin.op2, op1Reg); } } + else if (intrin.category == HW_Category_EnumPattern) + { + assert(hasImmediateOperand); + + switch (intrin.numOperands) + { + case 1: + { + HWIntrinsicImmOpHelper helper(this, intrin.op1, node); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + const insSvePattern pattern = (insSvePattern)helper.ImmValue(); + GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, pattern); + } + }; + break; + + default: + unreached(); + } + } else { assert(!hasImmediateOperand); @@ -1254,6 +1280,23 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg); break; + case NI_Sve_ConvertMaskToVector: + // PMOV would be ideal here, but it is in SVE2.1. + // Instead, use a predicated move: MOV ., /Z, #1 + GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 1, opt); + break; + + case NI_Sve_ConvertVectorToMask: + // PMOV would be ideal here, but it is in SVE2.1. + // Instead, use a compare: CMPNE ., /Z, ., #0 + GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt); + break; + + case NI_Sve_CreateTrueMaskAll: + // Must use the pattern variant, as the non-pattern varient is SVE2.1. + GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL); + break; + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index f8263c40bb0c6..ac110c2a0e1b5 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -16,6 +16,32 @@ // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SVE Intrinsics +// Sve +HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, false, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) + +HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_LowMaskedOperation) + + + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// Special intrinsics that are generated during importing or lowering + +HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, true, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_MaskedOperation) +HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation) + +HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, -1, false, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index fe5428ce2365d..9d28135c92a1a 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3187,6 +3187,24 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_Sve_CreateTrueMaskByte: + case NI_Sve_CreateTrueMaskDouble: + case NI_Sve_CreateTrueMaskInt16: + case NI_Sve_CreateTrueMaskInt32: + case NI_Sve_CreateTrueMaskInt64: + case NI_Sve_CreateTrueMaskSByte: + case NI_Sve_CreateTrueMaskSingle: + case NI_Sve_CreateTrueMaskUInt16: + case NI_Sve_CreateTrueMaskUInt32: + case NI_Sve_CreateTrueMaskUInt64: + assert(hasImmediateOperand); + assert(varTypeIsIntegral(intrin.op1)); + if (intrin.op1->IsCnsIntOrI()) + { + MakeSrcContained(node, intrin.op1); + } + break; + default: unreached(); } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 2b84018338620..b2d37b9becad9 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -788,7 +788,7 @@ LinearScan::LinearScan(Compiler* theCompiler) availableFloatRegs = RBM_ALLFLOAT; availableDoubleRegs = RBM_ALLDOUBLE; -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) availableMaskRegs = RBM_ALLMASK; #endif diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 7e71484c8f18a..9620abbc5a782 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -51,12 +51,12 @@ RegisterType regType(T type) { return IntRegisterType; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD) else if (varTypeUsesMaskReg(type)) { return MaskRegisterType; } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // (TARGET_XARCH || TARGET_ARM64) && FEATURE_SIMD else { assert(varTypeUsesFloatReg(type)); @@ -1662,12 +1662,12 @@ class LinearScan : public LinearScanInterface PhasedVar availableIntRegs; PhasedVar availableFloatRegs; PhasedVar availableDoubleRegs; -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) PhasedVar availableMaskRegs; #endif PhasedVar* availableRegs[TYP_COUNT]; -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) #define allAvailableRegs (availableIntRegs | availableFloatRegs | availableMaskRegs) #else #define allAvailableRegs (availableIntRegs | availableFloatRegs) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 52db29fd95abe..1096d7f11701c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1329,8 +1329,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou const HWIntrinsic intrin(intrinsicTree); - int srcCount = 0; - int dstCount = 0; + int srcCount = 0; + int dstCount = 0; + regMaskTP dstCandidates = RBM_NONE; if (HWIntrinsicInfo::IsMultiReg(intrin.id)) { @@ -1443,6 +1444,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op4->isContainedIntOrIImmed()); break; + case NI_Sve_CreateTrueMaskByte: + case NI_Sve_CreateTrueMaskDouble: + case NI_Sve_CreateTrueMaskInt16: + case NI_Sve_CreateTrueMaskInt32: + case NI_Sve_CreateTrueMaskInt64: + case NI_Sve_CreateTrueMaskSByte: + case NI_Sve_CreateTrueMaskSingle: + case NI_Sve_CreateTrueMaskUInt16: + case NI_Sve_CreateTrueMaskUInt32: + case NI_Sve_CreateTrueMaskUInt64: + needBranchTargetReg = !intrin.op1->isContainedIntOrIImmed(); + break; + default: unreached(); } @@ -1531,6 +1545,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount++; } } + else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) + { + regMaskTP predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK; + srcCount += BuildOperandUses(intrin.op1, predMask); + } else if (intrinsicTree->OperIsMemoryLoadOrStore()) { srcCount += BuildAddrUses(intrin.op1); @@ -1730,6 +1749,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } return srcCount; } + else if (intrin.op2 != nullptr) { // RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg @@ -1784,11 +1804,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if ((dstCount == 1) || (dstCount == 2)) { - BuildDef(intrinsicTree); + BuildDef(intrinsicTree, dstCandidates); if (dstCount == 2) { - BuildDef(intrinsicTree, RBM_NONE, 1); + BuildDef(intrinsicTree, dstCandidates, 1); } } else diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index ce038021bc525..7d76fc41d3300 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -140,6 +140,14 @@ #define REG_JUMP_THUNK_PARAM REG_R12 #define RBM_JUMP_THUNK_PARAM RBM_R12 + #define RBM_LOWMASK (RBM_P0 | RBM_P1 | RBM_P2 | RBM_P3 | RBM_P4 | RBM_P5 | RBM_P6 | RBM_P7) + #define RBM_HIGHMASK (RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15) + #define RBM_ALLMASK (RBM_LOWMASK | RBM_HIGHMASK) + + // TODO-SVE: Fix when adding predicate register allocation + #define RBM_MSK_CALLEE_SAVED (0) + #define RBM_MSK_CALLEE_TRASH (0) + // ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S): // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): // On entry: diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index 8b8da6db011f5..1a9a8c4072f6b 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -63,8 +63,10 @@ DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, 16,16, 16, 4,16, VTR_FLOAT, available #if defined(TARGET_XARCH) DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, 32,32, 32, 8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) DEF_TP(SIMD64 ,"simd64" , TYP_SIMD64, 64,64, 64, 16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S|VTF_VEC) -DEF_TP(MASK ,"mask" , TYP_MASK, 8, 8, 8, 2, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S) #endif // TARGET_XARCH +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) +DEF_TP(MASK ,"mask" , TYP_MASK, 8, 8, 8, 2, 8, VTR_MASK, availableMaskRegs, RBM_MSK_CALLEE_SAVED, RBM_MSK_CALLEE_TRASH, VTF_S) +#endif // TARGET_XARCH || TARGET_ARM64 #endif // FEATURE_SIMD DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN, 0, 0, 0, 0, 0, VTR_INT, availableIntRegs, RBM_INT_CALLEE_SAVED, RBM_INT_CALLEE_TRASH, VTF_ANY) diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index 27dd5b3329574..ed57a76b6e7ad 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -321,7 +321,7 @@ inline bool varTypeUsesMaskReg(T vt) // However, we only have one type that uses VTR_MASK today // and so its quite a bit cheaper to just check that directly -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) +#if defined(FEATURE_SIMD) && (defined(TARGET_XARCH) || defined(TARGET_ARM64)) assert((TypeGet(vt) == TYP_MASK) || (varTypeRegister[TypeGet(vt)] != VTR_MASK)); return TypeGet(vt) == TYP_MASK; #else diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index afd28c2c13fd0..0a2eb4515d758 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1031,6 +1031,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs new file mode 100644 index 0000000000000..868300bf14aca --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs @@ -0,0 +1,95 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System.Runtime.Intrinsics.Arm +{ + // Used to specify or limit the number of elements used within an method. + // Matches the field "pattern" within the Arm Architecture Reference Manual + public enum SveMaskPattern : byte + { + /// + /// POW2 + /// + LargestPowerOf2 = 0, // The largest power of 2. + + /// + /// VL1 + /// + VectorCount1 = 1, // Exactly 1 element. + + /// + /// VL2 + /// + VectorCount2 = 2, // Exactly 2 elements. + + /// + /// VL3 + /// + VectorCount3 = 3, // Exactly 3 elements. + + /// + /// VL4 + /// + VectorCount4 = 4, // Exactly 4 elements. + + /// + /// VL5 + /// + VectorCount5 = 5, // Exactly 5 elements. + + /// + /// VL6 + /// + VectorCount6 = 6, // Exactly 6 elements. + + /// + /// VL7 + /// + VectorCount7 = 7, // Exactly 7 elements. + + /// + /// VL8 + /// + VectorCount8 = 8, // Exactly 8 elements. + + /// + /// VL16 + /// + VectorCount16 = 9, // Exactly 16 elements. + + /// + /// VL32 + /// + VectorCount32 = 10, // Exactly 32 elements. + + /// + /// VL64 + /// + VectorCount64 = 11, // Exactly 64 elements. + + /// + /// VL128 + /// + VectorCount128 = 12, // Exactly 128 elements. + + /// + /// VL256 + /// + VectorCount256 = 13, // Exactly 256 elements. + + /// + /// MUL4 + /// + LargestMultipleOf4 = 29, // The largest multiple of 4. + + /// + /// MUL3 + /// + LargestMultipleOf3 = 30, // The largest multiple of 3. + + /// + /// ALL + /// + All = 31 // All available (implicitly a multiple of two). + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 3eeb40d5d9de1..fbd5ee65ca748 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -30,5 +30,170 @@ internal Arm64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } + + /// CreateTrueMaskByte : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskDouble : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskInt16 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskInt32 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskInt64 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskSByte : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskSingle : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskUInt16 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b16(enum svpattern pattern) + /// PTRUE Presult.H, pattern + /// + public static unsafe Vector CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskUInt32 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b32(enum svpattern pattern) + /// PTRUE Presult.S, pattern + /// + public static unsafe Vector CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + /// CreateTrueMaskUInt64 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b64(enum svpattern pattern) + /// PTRUE Presult.D, pattern + /// + public static unsafe Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + + + + /// LoadVector : Unextended load + + /// + /// svint8_t svld1[_s8](svbool_t pg, const int8_t *base) + /// LD1B Zresult.B, Pg/Z, [Xarray, Xindex] + /// LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, sbyte* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint16_t svld1[_s16](svbool_t pg, const int16_t *base) + /// LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1] + /// LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, short* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint32_t svld1[_s32](svbool_t pg, const int32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, int* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint64_t svld1[_s64](svbool_t pg, const int64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, long* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint8_t svld1[_u8](svbool_t pg, const uint8_t *base) + /// LD1B Zresult.B, Pg/Z, [Xarray, Xindex] + /// LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, byte* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svld1[_u16](svbool_t pg, const uint16_t *base) + /// LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1] + /// LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, ushort* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svld1[_u32](svbool_t pg, const uint32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, uint* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svld1[_u64](svbool_t pg, const uint64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, ulong* address) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svld1[_f32](svbool_t pg, const float32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, float* address) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat64_t svld1[_f64](svbool_t pg, const float64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, double* address) { throw new PlatformNotSupportedException(); } + + } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 7a71144e0bc33..6ba2a2c67bc8a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -27,5 +27,170 @@ internal Arm64() { } public static new bool IsSupported { get => IsSupported; } } + + + /// CreateTrueMaskByte : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskByte(pattern); + + + /// CreateTrueMaskDouble : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskDouble(pattern); + + + /// CreateTrueMaskInt16 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt16(pattern); + + + /// CreateTrueMaskInt32 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt32(pattern); + + + /// CreateTrueMaskInt64 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt64(pattern); + + + /// CreateTrueMaskSByte : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskSByte(pattern); + + + /// CreateTrueMaskSingle : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b8(enum svpattern pattern) + /// PTRUE Presult.B, pattern + /// + public static unsafe Vector CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskSingle(pattern); + + + /// CreateTrueMaskUInt16 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b16(enum svpattern pattern) + /// PTRUE Presult.H, pattern + /// + public static unsafe Vector CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt16(pattern); + + + /// CreateTrueMaskUInt32 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b32(enum svpattern pattern) + /// PTRUE Presult.S, pattern + /// + public static unsafe Vector CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt32(pattern); + + + /// CreateTrueMaskUInt64 : Set predicate elements to true + + /// + /// svbool_t svptrue_pat_b64(enum svpattern pattern) + /// PTRUE Presult.D, pattern + /// + public static unsafe Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt64(pattern); + + + + /// LoadVector : Unextended load + + /// + /// svint8_t svld1[_s8](svbool_t pg, const int8_t *base) + /// LD1B Zresult.B, Pg/Z, [Xarray, Xindex] + /// LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, sbyte* address) => LoadVector(mask, address); + + /// + /// svint16_t svld1[_s16](svbool_t pg, const int16_t *base) + /// LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1] + /// LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, short* address) => LoadVector(mask, address); + + /// + /// svint32_t svld1[_s32](svbool_t pg, const int32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, int* address) => LoadVector(mask, address); + + /// + /// svint64_t svld1[_s64](svbool_t pg, const int64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, long* address) => LoadVector(mask, address); + + /// + /// svuint8_t svld1[_u8](svbool_t pg, const uint8_t *base) + /// LD1B Zresult.B, Pg/Z, [Xarray, Xindex] + /// LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, byte* address) => LoadVector(mask, address); + + /// + /// svuint16_t svld1[_u16](svbool_t pg, const uint16_t *base) + /// LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1] + /// LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, ushort* address) => LoadVector(mask, address); + + /// + /// svuint32_t svld1[_u32](svbool_t pg, const uint32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, uint* address) => LoadVector(mask, address); + + /// + /// svuint64_t svld1[_u64](svbool_t pg, const uint64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, ulong* address) => LoadVector(mask, address); + + /// + /// svfloat32_t svld1[_f32](svbool_t pg, const float32_t *base) + /// LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2] + /// LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, float* address) => LoadVector(mask, address); + + /// + /// svfloat64_t svld1[_f64](svbool_t pg, const float64_t *base) + /// LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3] + /// LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVector(Vector mask, double* address) => LoadVector(mask, address); + } } diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index e74646eed09f7..77fe06ddc5c02 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4138,7 +4138,51 @@ internal Sve() { } internal Arm64() { } public static new bool IsSupported { get { throw null; } } } + + public static System.Numerics.Vector CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, sbyte* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, short* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, int* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, long* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, byte* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, ushort* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, uint* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, ulong* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, float* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, double* address) { throw null; } + } + + public enum SveMaskPattern : byte + { + LargestPowerOf2 = 0, // The largest power of 2. + VectorCount1 = 1, // 1 element. + VectorCount2 = 2, // 2 elements. + VectorCount3 = 3, // 3 elements. + VectorCount4 = 4, // 4 elements. + VectorCount5 = 5, // 5 elements. + VectorCount6 = 6, // 6 elements. + VectorCount7 = 7, // 7 elements. + VectorCount8 = 8, // 8 elements. + VectorCount16 = 9, // 16 elements. + VectorCount32 = 10, // 32 elements. + VectorCount64 = 11, // 64 elements. + VectorCount128 = 12, // 128 elements. + VectorCount256 = 13, // 256 elements. + LargestMultipleOf4 = 29, // The largest multiple of 4. + LargestMultipleOf3 = 30, // The largest multiple of 3. + All = 31 // All available (implicitly a multiple of two). + }; } namespace System.Runtime.Intrinsics.X86 { diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index fd27d93498192..5ee032e2842d6 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -2885,9 +2885,18 @@ ("SecureHashTernOpTest.template", new Dictionary { ["TestName"] = "ScheduleUpdate1_Vector128_UInt32", ["Isa"] = "Sha256", ["LoadIsa"] = "AdvSimd", ["Method"] = "ScheduleUpdate1", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "0x00112233", ["NextValueOp2"] = "0x44556677", ["NextValueOp3"] = "0x8899AABB", ["ExpectedResult"] = "{0x248F1BDF, 0x248F1BDF, 0xB303DDBA, 0xF74821FE}"}), }; -(string templateFileName, Dictionary templateData)[] SveInputs = Array.Empty<(string templateFileName, Dictionary templateData)>(); +(string templateFileName, Dictionary templateData)[] SveInputs = new [] { - //TODO-SVE: Add SVE tests + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_float", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_double", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_sbyte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_short", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_int", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_long", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_byte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_ushort", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_uint", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_ulong", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), }; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template new file mode 100644 index 0000000000000..09aaf2f442e13 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template @@ -0,0 +1,203 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new LoadUnaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works + test.RunBasicScenario_Load(); + + // Validates calling via reflection works + // TODO-SVE: Enable once register allocation exists for predicates. + // test.RunReflectionScenario_Load(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class LoadUnaryOpTest__{TestName} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op2BaseType}[] inArray1, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op2ElementCount = Unsafe.SizeOf<{RetVectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op2BaseType}[] _data = new {Op2BaseType}[Op2ElementCount]; + + private DataTable _dataTable; + + public LoadUnaryOpTest__{TestName}() + { + Succeeded = true; + + for (var i = 0; i < Op2ElementCount; i++) { _data[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + //TODO-SVE: Once register allocation exists for predicates, move loadMask into DataTable + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); + + var result = {Isa}.{Method}( + loadMask, + ({Op2BaseType}*)(_dataTable.inArray1Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof(Vector<{Op2BaseType}>), typeof({Op2BaseType}*) }) + .Invoke(null, new object[] { + loadMask, + Pointer.Box(_dataTable.inArray1Ptr, typeof({Op2BaseType}*)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + Succeeded = false; + + try + { + RunBasicScenario_Load(); + } + catch (PlatformNotSupportedException) + { + Succeeded = true; + } + } + + private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray[0]), ref Unsafe.AsRef(firstOp), (uint)Unsafe.SizeOf<{RetVectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray, outArray, method); + } + + private void ValidateResult({Op2BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + for (var i = 0; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op2BaseType}): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +}