From 4ea30dbfe4f51f3d5a76c3ad4d72e7844e52dde0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 13 May 2024 13:01:01 -0700 Subject: [PATCH 1/8] Add DotProduct() and DotProductBySelectedScalar() APIs --- .../Arm/Sve.PlatformNotSupported.cs | 54 +++++++++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 53 ++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 11 ++++ 3 files changed, 118 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index e4042d961a4f2..346b2c1de1f00 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -1160,6 +1160,60 @@ internal Arm64() { } /// public static unsafe Vector Divide(Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// DotProduct : Dot product + + /// + /// svint32_t svdot[_s32](svint32_t op1, svint8_t op2, svint8_t op3) + /// SDOT Ztied1.S, Zop2.B, Zop3.B + /// + public static unsafe Vector DotProduct(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svint64_t svdot[_s64](svint64_t op1, svint16_t op2, svint16_t op3) + /// SDOT Ztied1.D, Zop2.H, Zop3.H + /// + public static unsafe Vector DotProduct(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svdot[_u32](svuint32_t op1, svuint8_t op2, svuint8_t op3) + /// UDOT Ztied1.S, Zop2.B, Zop3.B + /// + public static unsafe Vector DotProduct(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svdot[_u64](svuint64_t op1, svuint16_t op2, svuint16_t op3) + /// UDOT Ztied1.D, Zop2.H, Zop3.H + /// + public static unsafe Vector DotProduct(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + + + /// DotProductBySelectedScalar : Dot product + + /// + /// svint32_t svdot_lane[_s32](svint32_t op1, svint8_t op2, svint8_t op3, uint64_t imm_index) + /// SDOT Ztied1.S, Zop2.B, Zop3.B[imm_index] + /// + public static unsafe Vector DotProductBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + + /// + /// svint64_t svdot_lane[_s64](svint64_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index) + /// SDOT Ztied1.D, Zop2.H, Zop3.H[imm_index] + /// + public static unsafe Vector DotProductBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svdot_lane[_u32](svuint32_t op1, svuint8_t op2, svuint8_t op3, uint64_t imm_index) + /// UDOT Ztied1.S, Zop2.B, Zop3.B[imm_index] + /// + public static unsafe Vector DotProductBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svdot_lane[_u64](svuint64_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index) + /// UDOT Ztied1.D, Zop2.H, Zop3.H[imm_index] + /// + public static unsafe Vector DotProductBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) { throw new PlatformNotSupportedException(); } + /// FusedMultiplyAdd : Multiply-add, addend first /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 0776b8c8db8c4..b6c4be424c056 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -1217,6 +1217,59 @@ internal Arm64() { } /// public static unsafe Vector Divide(Vector left, Vector right) => Divide(left, right); + /// DotProduct : Dot product + + /// + /// svint32_t svdot[_s32](svint32_t op1, svint8_t op2, svint8_t op3) + /// SDOT Ztied1.S, Zop2.B, Zop3.B + /// + public static unsafe Vector DotProduct(Vector addend, Vector left, Vector right) => DotProduct(addend, left, right); + + /// + /// svint64_t svdot[_s64](svint64_t op1, svint16_t op2, svint16_t op3) + /// SDOT Ztied1.D, Zop2.H, Zop3.H + /// + public static unsafe Vector DotProduct(Vector addend, Vector left, Vector right) => DotProduct(addend, left, right); + + /// + /// svuint32_t svdot[_u32](svuint32_t op1, svuint8_t op2, svuint8_t op3) + /// UDOT Ztied1.S, Zop2.B, Zop3.B + /// + public static unsafe Vector DotProduct(Vector addend, Vector left, Vector right) => DotProduct(addend, left, right); + + /// + /// svuint64_t svdot[_u64](svuint64_t op1, svuint16_t op2, svuint16_t op3) + /// UDOT Ztied1.D, Zop2.H, Zop3.H + /// + public static unsafe Vector DotProduct(Vector addend, Vector left, Vector right) => DotProduct(addend, left, right); + + + /// DotProductBySelectedScalar : Dot product + + /// + /// svint32_t svdot_lane[_s32](svint32_t op1, svint8_t op2, svint8_t op3, uint64_t imm_index) + /// SDOT Ztied1.S, Zop2.B, Zop3.B[imm_index] + /// + public static unsafe Vector DotProductBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) => DotProductBySelectedScalar(addend, left, right, rightIndex); + + /// + /// svint64_t svdot_lane[_s64](svint64_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index) + /// SDOT Ztied1.D, Zop2.H, Zop3.H[imm_index] + /// + public static unsafe Vector DotProductBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) => DotProductBySelectedScalar(addend, left, right, rightIndex); + + /// + /// svuint32_t svdot_lane[_u32](svuint32_t op1, svuint8_t op2, svuint8_t op3, uint64_t imm_index) + /// UDOT Ztied1.S, Zop2.B, Zop3.B[imm_index] + /// + public static unsafe Vector DotProductBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) => DotProductBySelectedScalar(addend, left, right, rightIndex); + + /// + /// svuint64_t svdot_lane[_u64](svuint64_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index) + /// UDOT Ztied1.D, Zop2.H, Zop3.H[imm_index] + /// + public static unsafe Vector DotProductBySelectedScalar(Vector addend, Vector left, Vector right, [ConstantExpected] byte rightIndex) => DotProductBySelectedScalar(addend, left, right, rightIndex); + /// FusedMultiplyAdd : Multiply-add, addend first /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 6a094cc2bd41c..d4f4cf393cb9d 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4322,9 +4322,20 @@ internal Arm64() { } public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(long left, long right) { throw null; } public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(uint left, uint right) { throw null; } public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(ulong left, ulong right) { throw null; } + public static System.Numerics.Vector Divide(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector Divide(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector DotProduct(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector DotProduct(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector DotProduct(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector DotProduct(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + + public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector FusedMultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector FusedMultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector FusedMultiplyAddBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } From d70d0d2fe9c2a656f42ceb370eb9ec85fa087d0e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 13 May 2024 13:47:20 -0700 Subject: [PATCH 2/8] Map API to instruction --- src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 3e70c507d015a..d78995a6d49cb 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -60,6 +60,8 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic) HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) From ad17c474c019c94606709739f9692a7c0f0774d4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 13 May 2024 17:07:56 -0700 Subject: [PATCH 3/8] Fix test cases --- .../GenerateHWIntrinsicTests_Arm.cs | 28 +- .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 24 + ..._SveImmTernOpFirstArgTestTemplate.template | 436 +++++++++++++++++ .../Shared/_SveImmTernOpTestTemplate.template | 12 +- .../_SveTernOpFirstArgTestTemplate.template | 452 ++++++++++++++++++ .../Shared/_SveTernOpTestTemplate.template | 100 ++-- 6 files changed, 990 insertions(+), 62 deletions(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpFirstArgTestTemplate.template create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpFirstArgTestTemplate.template diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 70fd2c920586d..b37ec3a274b17 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -144,7 +144,9 @@ ("_SveBinaryOpTestTemplate.template", "SveVecBinOpConvertTest.template",new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), ("_SveImmBinaryOpTestTemplate.template", "SveVecImmBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel }), ("_SveTernOpTestTemplate.template", "SveVecTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), + ("_SveTernOpFirstArgTestTemplate.template", "SveVecTernOpFirstArgTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), ("_SveImmTernOpTestTemplate.template", "SveVecImmTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), + ("_SveImmTernOpFirstArgTestTemplate.template", "SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel }), ("_SveMinimalUnaryOpTestTemplate.template", "SveVecReduceUnOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = VecReduceOpTest_ValidationLogic }), }; @@ -3053,23 +3055,33 @@ ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Divide_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Divide", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "Helpers.Divide(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Divide(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_Divide_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Divide", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "Helpers.Divide(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.Divide(left[i], right[i])"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve_DotProduct_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "DotProduct", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * i) != result[i]", ["GetIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * i)", ["ConvertFunc"] = ""}), + ("SveVecTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve_DotProduct_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "DotProduct", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * i) != result[i]", ["GetIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * i)", ["ConvertFunc"] = ""}), + ("SveVecTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve_DotProduct_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "DotProduct", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * i) != result[i]", ["GetIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * i)", ["ConvertFunc"] = ""}), + ("SveVecTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve_DotProduct_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "DotProduct", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * i) != result[i]", ["GetIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * i)", ["ConvertFunc"] = ""}), + + ("SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve_DotProductBySelectedScalar_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "DotProductBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["Imm"] = "2", ["ValidateIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * Imm) != result[i]", ["GetIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * Imm)", ["ConvertFunc"] = ""}), + ("SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve_DotProductBySelectedScalar_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "DotProductBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["Imm"] = "1", ["ValidateIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * Imm) != result[i]", ["GetIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * Imm)", ["ConvertFunc"] = ""}), + ("SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve_DotProductBySelectedScalar_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "DotProductBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["Imm"] = "1", ["ValidateIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * Imm) != result[i]", ["GetIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * Imm)", ["ConvertFunc"] = ""}), + ("SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TestName"] = "Sve_DotProductBySelectedScalar_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "DotProductBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["Imm"] = "1", ["ValidateIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * Imm) != result[i]", ["GetIterResult"] = "Helpers.DotProduct(first[i], second, 4 * i, third, 4 * Imm)", ["ConvertFunc"] = ""}), + + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveSimpleNoOpTest.template", new Dictionary { ["TestName"] = "Sve_CreateFalseMaskByte_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateFalseMaskByte", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["ValidateIterResult"] = "result[i] != 0",}), ("SveSimpleNoOpTest.template", new Dictionary { ["TestName"] = "Sve_CreateFalseMaskDouble_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateFalseMaskDouble", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["ValidateIterResult"] = "result[i] != 0",}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index f99081bd44466..d6b561bd0fab5 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -6231,6 +6231,30 @@ public static int DotProduct(int op1, sbyte[] op2, int s, sbyte[] op3, int t) return result; } + public static ulong DotProduct(ulong op1, ushort[] op2, int s, ushort[] op3, int t) + { + ulong result = op1; + + for (int i = 0; i < 4; i++) + { + result += (ulong)((ulong)op2[s + i] * (ulong)op3[t + i]); + } + + return result; + } + + public static long DotProduct(long op1, short[] op2, int s, short[] op3, int t) + { + long result = op1; + + for (int i = 0; i < 4; i++) + { + result += (long)((long)op2[s + i] * (long)op3[t + i]); + } + + return result; + } + public static int WhileLessThanMask(int op1, int op2) { return (op1 < op2) ? 1 : 0; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpFirstArgTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpFirstArgTestTemplate.template new file mode 100644 index 0000000000000..186cb5279acb6 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpFirstArgTestTemplate.template @@ -0,0 +1,436 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics.Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new {TemplateName}TernaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + // Validates executing the test inside conditional, with op1 as falseValue + test.ConditionalSelect_Op1(); + // Validates executing the test inside conditional, with op3 as falseValue + test.ConditionalSelect_FalseOp(); + // Validates executing the test inside conditional, with op3 as zero + test.ConditionalSelect_ZeroOp(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class {TemplateName}TernaryOpTest__{TestName} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op2BaseType}[] inArray2, {Op3BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op3BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op2BaseType}> _fld2; + public {Op1VectorType}<{Op3BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op3BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario({TemplateName}TernaryOpTest__{TestName} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2, _fld3, {Imm}); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int Op3ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>() / sizeof({Op3BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + private static readonly byte Imm = {Imm}; + + private static {Op1BaseType}[] _maskData = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; + private static {Op3BaseType}[] _data3 = new {Op3BaseType}[Op3ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _mask; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op2BaseType}> _fld2; + private {Op1VectorType}<{Op3BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _falseFld; + + private DataTable _dataTable; + + public {TemplateName}TernaryOpTest__{TestName}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op3BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + {Op1VectorType}<{Op1BaseType}> loadMask1 = Sve.CreateTrueMask{Op1BaseType}(SveMaskPattern.All); + {Op1VectorType}<{Op2BaseType}> loadMask2 = Sve.CreateTrueMask{Op2BaseType}(SveMaskPattern.All); + {Op1VectorType}<{Op3BaseType}> loadMask3 = Sve.CreateTrueMask{Op3BaseType}(SveMaskPattern.All); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(loadMask1, ({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask2, ({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask3, ({Op3BaseType}*)(_dataTable.inArray3Ptr)), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op2BaseType}>), typeof({Op1VectorType}<{Op3BaseType}>), typeof(byte) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr), + (byte){Imm} + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}(op1, op2, op3, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2, _fld3, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2, test._fld3, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + public void ConditionalSelect_Op1() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _fld1); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _fld1); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _fld1); + } + + public void ConditionalSelect_FalseOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _falseFld); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _falseFld); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _falseFld); + } + public void ConditionalSelect_ZeroOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + } + [method: MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op2BaseType}> op2, {Op1VectorType}<{Op3BaseType}> op3, {Op1VectorType}<{Op1BaseType}> falseOp) + { + var result = Sve.ConditionalSelect(mask, {Isa}.{Method}(op1, op2, op3, Imm), falseOp); + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateConditionalSelectResult(mask, op1, op2, op3, falseOp, _dataTable.outArrayPtr); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + private void ValidateConditionalSelectResult({Op1VectorType}<{Op1BaseType}> maskOp, {Op1VectorType}<{Op1BaseType}> firstOp, {Op1VectorType}<{Op2BaseType}> secondOp, {Op1VectorType}<{Op3BaseType}> thirdOp, {Op1VectorType}<{Op1BaseType}> falseOp, void* output, [CallerMemberName] string method = "") + { + {Op1BaseType}[] mask = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] first = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] second = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] third = new {Op3BaseType}[Op3ElementCount]; + {Op1BaseType}[] falseVal = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] result = new {RetBaseType}[RetElementCount]; + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref mask[0]), maskOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref first[0]), firstOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref second[0]), secondOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref third[0]), thirdOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref falseVal[0]), falseOp); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref result[0]), ref Unsafe.AsRef(output), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + bool succeeded = true; + {TemplateValidationLogicForCndSel} + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op2BaseType}>, {Op1VectorType}<{Op3BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" mask: ({string.Join(", ", mask)})"); + TestLibrary.TestFramework.LogInformation($" first: ({string.Join(", ", first)})"); + TestLibrary.TestFramework.LogInformation($" second: ({string.Join(", ", second)})"); + TestLibrary.TestFramework.LogInformation($" third: ({string.Join(", ", third)})"); + TestLibrary.TestFramework.LogInformation($" falseOp: ({string.Join(", ", falseVal)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op2BaseType}> op2, {Op1VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] first, {Op2BaseType}[] second, {Op3BaseType}[] third, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + {TemplateValidationLogic} + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", first)})"); + TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", second)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", third)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template index bc7fafb5b24e9..3378f32c9a952 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template @@ -149,9 +149,9 @@ namespace JIT.HardwareIntrinsics.Arm for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp3}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); return testStruct; @@ -193,15 +193,15 @@ namespace JIT.HardwareIntrinsics.Arm Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp3}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } - for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp3}; } _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); } diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpFirstArgTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpFirstArgTestTemplate.template new file mode 100644 index 0000000000000..1ec3f6d9d2c54 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpFirstArgTestTemplate.template @@ -0,0 +1,452 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics.Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new {TemplateName}TernaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + + // Validates executing the test inside conditional, with op1 as falseValue + test.ConditionalSelect_Op1(); + + // Validates executing the test inside conditional, with op3 as falseValue + test.ConditionalSelect_FalseOp(); + + // Validates executing the test inside conditional, with op3 as zero + test.ConditionalSelect_ZeroOp(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class {TemplateName}TernaryOpTest__{TestName} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op2BaseType}[] inArray2, {Op3BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op3BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op2BaseType}> _fld2; + public {Op1VectorType}<{Op3BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op3BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario({TemplateName}TernaryOpTest__{TestName} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2, _fld3); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int Op3ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>() / sizeof({Op3BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _maskData = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; + private static {Op3BaseType}[] _data3 = new {Op3BaseType}[Op3ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _mask; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op2BaseType}> _fld2; + private {Op1VectorType}<{Op3BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _falseFld; + + private DataTable _dataTable; + + public {TemplateName}TernaryOpTest__{TestName}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op3BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>()); + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + {Op1VectorType}<{Op1BaseType}> loadMask1 = Sve.CreateTrueMask{Op1BaseType}(SveMaskPattern.All); + {Op1VectorType}<{Op2BaseType}> loadMask2 = Sve.CreateTrueMask{Op2BaseType}(SveMaskPattern.All); + {Op1VectorType}<{Op3BaseType}> loadMask3 = Sve.CreateTrueMask{Op3BaseType}(SveMaskPattern.All); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(loadMask1, ({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask2, ({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask3, ({Op3BaseType}*)(_dataTable.inArray3Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op2BaseType}>), typeof({Op1VectorType}<{Op3BaseType}>) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}(op1, op2, op3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2, _fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2, test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void ConditionalSelect_Op1() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _fld1); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _fld1); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_Op1_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _fld1); + } + + public void ConditionalSelect_FalseOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, _falseFld); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, _falseFld); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_FalseOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, _falseFld); + } + + public void ConditionalSelect_ZeroOp() + { + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_mask"); + ConditionalSelectScenario(_mask, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_zero"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.Zero, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + + TestLibrary.TestFramework.BeginScenario("ConditionalSelect_ZeroOp_all"); + ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}>.AllBitsSet, _fld1, _fld2, _fld3, {Op1VectorType}<{Op1BaseType}>.Zero); + } + + [method: MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op2BaseType}> op2, {Op1VectorType}<{Op3BaseType}> op3, {Op1VectorType}<{Op1BaseType}> falseOp) + { + var result = Sve.ConditionalSelect(mask, {Isa}.{Method}(op1, op2, op3), falseOp); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateConditionalSelectResult(mask, op1, op2, op3, falseOp, _dataTable.outArrayPtr); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateConditionalSelectResult({Op1VectorType}<{Op1BaseType}> maskOp, {Op1VectorType}<{Op1BaseType}> firstOp, {Op1VectorType}<{Op2BaseType}> secondOp, {Op1VectorType}<{Op3BaseType}> thirdOp, {Op1VectorType}<{Op1BaseType}> falseOp, void* output, [CallerMemberName] string method = "") + { + {Op1BaseType}[] mask = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] first = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] second = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] third = new {Op3BaseType}[Op3ElementCount]; + {Op1BaseType}[] falseVal = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] result = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref mask[0]), maskOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref first[0]), firstOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref second[0]), secondOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref third[0]), thirdOp); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref falseVal[0]), falseOp); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref result[0]), ref Unsafe.AsRef(output), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + bool succeeded = true; + + {TemplateValidationLogicForCndSel} + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" mask: ({string.Join(", ", mask)})"); + TestLibrary.TestFramework.LogInformation($" first: ({string.Join(", ", first)})"); + TestLibrary.TestFramework.LogInformation($" second: ({string.Join(", ", second)})"); + TestLibrary.TestFramework.LogInformation($" third: ({string.Join(", ", third)})"); + TestLibrary.TestFramework.LogInformation($" falseOp: ({string.Join(", ", falseVal)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op2BaseType}> op2, {Op1VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] first, {Op2BaseType}[] second, {Op3BaseType}[] third, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + {TemplateValidationLogic} + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op2BaseType}>, {Op1VectorType}<{Op3BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", first)})"); + TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", second)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", third)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template index 86eaa7a453da0..044643bfa5fe6 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveTernOpTestTemplate.template @@ -95,11 +95,11 @@ namespace JIT.HardwareIntrinsics.Arm private ulong alignment; - public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + public DataTable({Op1BaseType}[] inArray1, {Op2BaseType}[] inArray2, {Op3BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) { int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); - int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); - int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op3BaseType}>(); int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); if ((alignment != 64 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) { @@ -119,8 +119,8 @@ namespace JIT.HardwareIntrinsics.Arm this.alignment = (ulong)alignment; Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); - Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); - Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); } public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); @@ -145,8 +145,8 @@ namespace JIT.HardwareIntrinsics.Arm private struct TestStruct { public {Op1VectorType}<{Op1BaseType}> _fld1; - public {Op1VectorType}<{Op1BaseType}> _fld2; - public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op1VectorType}<{Op2BaseType}> _fld2; + public {Op1VectorType}<{Op3BaseType}> _fld3; public static TestStruct Create() { @@ -154,10 +154,10 @@ namespace JIT.HardwareIntrinsics.Arm for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op3BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>()); return testStruct; } @@ -174,17 +174,19 @@ namespace JIT.HardwareIntrinsics.Arm private static readonly int LargestVectorSize = {LargestVectorSize}; private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int Op3ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>() / sizeof({Op3BaseType}); private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); private static {Op1BaseType}[] _maskData = new {Op1BaseType}[Op1ElementCount]; private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; - private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; - private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; + private static {Op3BaseType}[] _data3 = new {Op3BaseType}[Op3ElementCount]; private {Op1VectorType}<{Op1BaseType}> _mask; private {Op1VectorType}<{Op1BaseType}> _fld1; - private {Op1VectorType}<{Op1BaseType}> _fld2; - private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op1VectorType}<{Op2BaseType}> _fld2; + private {Op1VectorType}<{Op3BaseType}> _fld3; private {Op1VectorType}<{Op1BaseType}> _falseFld; private DataTable _dataTable; @@ -197,16 +199,16 @@ namespace JIT.HardwareIntrinsics.Arm Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op3BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>()); - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } - for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); } @@ -220,8 +222,8 @@ namespace JIT.HardwareIntrinsics.Arm var result = {Isa}.{Method}( Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr) + Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); @@ -232,12 +234,14 @@ namespace JIT.HardwareIntrinsics.Arm { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); - {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); + {Op1VectorType}<{Op1BaseType}> loadMask1 = Sve.CreateTrueMask{Op1BaseType}(SveMaskPattern.All); + {Op1VectorType}<{Op2BaseType}> loadMask2 = Sve.CreateTrueMask{Op2BaseType}(SveMaskPattern.All); + {Op1VectorType}<{Op3BaseType}> loadMask3 = Sve.CreateTrueMask{Op3BaseType}(SveMaskPattern.All); var result = {Isa}.{Method}( - {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray1Ptr)), - {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray2Ptr)), - {LoadIsa}.Load{Op1VectorType}(loadMask, ({Op1BaseType}*)(_dataTable.inArray3Ptr)) + {LoadIsa}.Load{Op1VectorType}(loadMask1, ({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask2, ({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {LoadIsa}.Load{Op1VectorType}(loadMask3, ({Op3BaseType}*)(_dataTable.inArray3Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); @@ -248,11 +252,11 @@ namespace JIT.HardwareIntrinsics.Arm { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); - var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op1BaseType}>) }) + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op1VectorType}<{Op2BaseType}>), typeof({Op1VectorType}<{Op3BaseType}>) }) .Invoke(null, new object[] { Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr) + Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr) }); Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); @@ -264,8 +268,8 @@ namespace JIT.HardwareIntrinsics.Arm TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); - var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); - var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr); var result = {Isa}.{Method}(op1, op2, op3); Unsafe.Write(_dataTable.outArrayPtr, result); @@ -362,7 +366,7 @@ namespace JIT.HardwareIntrinsics.Arm } [method: MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> falseOp) + private void ConditionalSelectScenario({Op1VectorType}<{Op1BaseType}> mask, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op2BaseType}> op2, {Op1VectorType}<{Op3BaseType}> op3, {Op1VectorType}<{Op1BaseType}> falseOp) { var result = Sve.ConditionalSelect(mask, {Isa}.{Method}(op1, op2, op3), falseOp); @@ -391,12 +395,12 @@ namespace JIT.HardwareIntrinsics.Arm } } - private void ValidateConditionalSelectResult({Op1VectorType}<{Op1BaseType}> maskOp, {Op1VectorType}<{Op1BaseType}> firstOp, {Op1VectorType}<{Op1BaseType}> secondOp, {Op1VectorType}<{Op1BaseType}> thirdOp, {Op1VectorType}<{Op1BaseType}> falseOp, void* output, [CallerMemberName] string method = "") + private void ValidateConditionalSelectResult({Op1VectorType}<{Op1BaseType}> maskOp, {Op1VectorType}<{Op1BaseType}> firstOp, {Op1VectorType}<{Op2BaseType}> secondOp, {Op1VectorType}<{Op3BaseType}> thirdOp, {Op1VectorType}<{Op1BaseType}> falseOp, void* output, [CallerMemberName] string method = "") { {Op1BaseType}[] mask = new {Op1BaseType}[Op1ElementCount]; {Op1BaseType}[] first = new {Op1BaseType}[Op1ElementCount]; - {Op1BaseType}[] second = new {Op1BaseType}[Op1ElementCount]; - {Op1BaseType}[] third = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] second = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] third = new {Op3BaseType}[Op3ElementCount]; {Op1BaseType}[] falseVal = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] result = new {RetBaseType}[RetElementCount]; @@ -426,16 +430,16 @@ namespace JIT.HardwareIntrinsics.Arm } } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op2BaseType}> op2, {Op1VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; - {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; - {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op1ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); - Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); - Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); ValidateResult(inArray1, inArray2, inArray3, outArray, method); @@ -444,19 +448,19 @@ namespace JIT.HardwareIntrinsics.Arm private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; - {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; - {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op1ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op3BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); ValidateResult(inArray1, inArray2, inArray3, outArray, method); } - private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") { bool succeeded = true; @@ -464,7 +468,7 @@ namespace JIT.HardwareIntrinsics.Arm if (!succeeded) { - TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op2BaseType}>, {Op1VectorType}<{Op3BaseType}>): {method} failed:"); TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", secondOp)})"); TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); From 470ce6ccc576ba00f90aa6769d0c829cec5ed076 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 11:51:26 -0700 Subject: [PATCH 4/8] Make DotProductBySelectedScalar work --- src/coreclr/jit/codegenarm64test.cpp | 24 +++++++++++------------ src/coreclr/jit/emitarm64sve.cpp | 13 ++++++------ src/coreclr/jit/hwintrinsicarm64.cpp | 11 +++++++++++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 4 ++-- src/coreclr/jit/lsraarm64.cpp | 15 ++++++++++++++ 5 files changed, 46 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 8c43a2972c1db..b7247cb0c7503 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -6994,23 +6994,23 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_EY_3A theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V9, REG_V10, REG_V4, 0, - INS_OPTS_SCALABLE_B); // SDOT .S, .B, .B[] + INS_OPTS_SCALABLE_S); // SDOT .S, .B, .B[] theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V11, REG_V12, REG_V5, 1, - INS_OPTS_SCALABLE_B); // SDOT .S, .B, .B[] + INS_OPTS_SCALABLE_S); // SDOT .S, .B, .B[] theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V13, REG_V14, REG_V6, 2, - INS_OPTS_SCALABLE_B); // UDOT .S, .B, .B[] + INS_OPTS_SCALABLE_S); // UDOT .S, .B, .B[] theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V15, REG_V16, REG_V7, 3, - INS_OPTS_SCALABLE_B); // UDOT .S, .B, .B[] + INS_OPTS_SCALABLE_S); // UDOT .S, .B, .B[] // IF_SVE_EY_3B - theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V0, REG_V1, REG_V0, - 0); // SDOT .D, .H, .H[] - theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V2, REG_V3, REG_V5, - 1); // SDOT .D, .H, .H[] - theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V4, REG_V5, REG_V10, - 0); // UDOT .D, .H, .H[] - theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V6, REG_V7, REG_V15, - 1); // UDOT .D, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, + INS_OPTS_SCALABLE_D); // SDOT .D, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, + INS_OPTS_SCALABLE_D); // SDOT .D, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, + INS_OPTS_SCALABLE_D); // UDOT .D, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, + INS_OPTS_SCALABLE_D); // UDOT .D, .H, .H[] // IF_SVE_EZ_3A theEmitter->emitIns_R_R_R_I(INS_sve_sudot, EA_SCALABLE, REG_V17, REG_V18, REG_V0, 0, diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 1c596ff1f21ae..4f6275fa71f3c 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -4494,21 +4494,20 @@ void emitter::emitInsSve_R_R_R_I(instruction ins, assert(isVectorRegister(reg2)); // nnnnn assert(isLowVectorRegister(reg3)); // mmmm - if (opt == INS_OPTS_SCALABLE_B) + if (opt == INS_OPTS_SCALABLE_H) { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm<2>(imm)); // ii - fmt = IF_SVE_EY_3A; + assert(isValidUimm<2>(imm)); // ii + fmt = IF_SVE_EG_3A; } - else if (opt == INS_OPTS_SCALABLE_H) + else if (opt == INS_OPTS_SCALABLE_S) { assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm assert(isValidUimm<2>(imm)); // ii - fmt = IF_SVE_EG_3A; + fmt = IF_SVE_EY_3A; } else { - assert(insOptsNone(opt)); + assert(opt == INS_OPTS_SCALABLE_D); assert(isValidUimm<1>(imm)); // i opt = INS_OPTS_SCALABLE_H; fmt = IF_SVE_EY_3B; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index ce3d6e5e69fa2..4fb3716d9cdf6 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -327,6 +327,17 @@ void Compiler::getHWIntrinsicImmTypes(NamedIntrinsic intrinsic, // The second source operand of sdot, udot instructions is an indexed 32-bit element. indexedElementBaseType = simdBaseType; } + + if (intrinsic == NI_Sve_DotProductBySelectedScalar) + { + assert(((simdBaseType == TYP_INT) && (indexedElementBaseType == TYP_BYTE)) || + ((simdBaseType == TYP_UINT) && (indexedElementBaseType == TYP_UBYTE)) || + ((simdBaseType == TYP_LONG) && (indexedElementBaseType == TYP_SHORT)) || + ((simdBaseType == TYP_ULONG) && (indexedElementBaseType == TYP_USHORT))); + + // The second source operand of sdot, udot instructions is an indexed 32-bit element. + indexedElementBaseType = simdBaseType; + } } assert(indexedElementBaseType == simdBaseType); diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index d78995a6d49cb..b6415f9202d14 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -60,8 +60,8 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic) HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 264bc70b0a74d..de363c8645a2e 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1881,6 +1881,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS; } } + else if (intrin.id == NI_Sve_DotProductBySelectedScalar) + { + unsigned baseElementSize = genTypeSize(intrin.baseType); + // If this is common pattern, then we will add a flag in the table, but for now, just check for specific + // intrinsics + if (baseElementSize == 8) + { + candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS; + } + else + { + assert(baseElementSize == 4); + candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS; + } + } if ((intrin.id == NI_Sve_ConditionalSelect) && (intrin.op2->IsEmbMaskOp()) && (intrin.op2->isRMWHWIntrinsic(compiler))) From ecab48369a8e95d65db890c82182e09555018450 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 11:52:52 -0700 Subject: [PATCH 5/8] Fix mov unit tests --- src/coreclr/jit/codegenarm64test.cpp | 12 ++++++------ src/coreclr/jit/emitarm64.cpp | 10 ++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index b7247cb0c7503..31e2765e73706 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4998,7 +4998,7 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_PREDICATE_MERGE); /* MOV .B, /M, .B */ // IF_SVE_CZ_4A_L - theEmitter->emitIns_R_R(INS_sve_mov, EA_SCALABLE, REG_P0, REG_P15, INS_OPTS_SCALABLE_B); /* MOV .B, .B + theEmitter->emitIns_Mov(INS_sve_mov, EA_SCALABLE, REG_P0, REG_P15, /* canSkip */ false, INS_OPTS_SCALABLE_B); /* MOV .B, .B */ // IF_SVE_DA_4A @@ -8892,15 +8892,15 @@ void CodeGen::genArm64EmitterUnitTestsSve() // DUP ., theEmitter->emitIns_R_R(INS_sve_dup, EA_8BYTE, REG_V4, REG_SP, INS_OPTS_SCALABLE_D); // MOV ., - theEmitter->emitIns_R_R(INS_sve_mov, EA_4BYTE, REG_V4, REG_R2, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_Mov(INS_sve_mov, EA_4BYTE, REG_V4, REG_R2, /* canSkip */ false, INS_OPTS_SCALABLE_B); // MOV ., - theEmitter->emitIns_R_R(INS_sve_mov, EA_4BYTE, REG_V4, REG_R2, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_Mov(INS_sve_mov, EA_4BYTE, REG_V4, REG_R2, /* canSkip */ false, INS_OPTS_SCALABLE_H); // MOV ., - theEmitter->emitIns_R_R(INS_sve_mov, EA_4BYTE, REG_V1, REG_R3, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_Mov(INS_sve_mov, EA_4BYTE, REG_V1, REG_R3, /* canSkip */ false, INS_OPTS_SCALABLE_S); // MOV ., - theEmitter->emitIns_R_R(INS_sve_mov, EA_8BYTE, REG_V5, REG_SP, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_Mov(INS_sve_mov, EA_8BYTE, REG_V5, REG_SP, /* canSkip */ false, INS_OPTS_SCALABLE_D); // MOV ., - theEmitter->emitIns_R_R(INS_sve_mov, EA_8BYTE, REG_V2, REG_R9, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_Mov(INS_sve_mov, EA_8BYTE, REG_V2, REG_R9, /* canSkip */ false, INS_OPTS_SCALABLE_D); // IF_SVE_BJ_2A // FEXPA ., . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 091de211cb013..cc19890aa0e3a 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4275,6 +4275,16 @@ void emitter::emitIns_Mov( } fmt = IF_SVE_AU_3A; } + else if (isVectorRegister(dstReg) && isGeneralRegisterOrSP(srcReg)) + { + assert(insOptsScalable(opt)); + if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) + { + return; + } + srcReg = encodingSPtoZR(srcReg); + fmt = IF_SVE_CB_2A; + } else { unreached(); From d53327504164a9e8e4227bc8e27c7343b60099aa Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 12:54:03 -0700 Subject: [PATCH 6/8] Add LowVector flag --- src/coreclr/jit/hwintrinsic.h | 11 +++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 8 +- src/coreclr/jit/lsra.h | 1 + src/coreclr/jit/lsraarm64.cpp | 96 ++++++++++++++--------- 4 files changed, 77 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 2f7690c091e9f..16c5b770ada4a 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -237,6 +237,11 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic is a FusedMultiplyAdd intrinsic HW_Flag_FmaIntrinsic = 0x20000000, +#if defined(TARGET_ARM64) + // The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register. + HW_Flag_LowVectorOperation = 0x4000000, +#endif + HW_Flag_CanBenefitFromConstantProp = 0x80000000, }; @@ -891,6 +896,12 @@ struct HWIntrinsicInfo return (flags & HW_Flag_LowMaskedOperation) != 0; } + static bool IsLowVectorOperation(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_LowVectorOperation) != 0; + } + static bool IsOptionalEmbeddedMaskedOperation(NamedIntrinsic id) { const HWIntrinsicFlag flags = lookupFlags(id); diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index b6415f9202d14..00b0739604b3c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -61,12 +61,12 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic) +HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic) +HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) @@ -103,7 +103,7 @@ HARDWARE_INTRINSIC(Sve, MinNumber, HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation) HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand) +HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Negate, -1, -1, false, {INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_neg, INS_invalid, INS_sve_fneg, INS_sve_fneg}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a80bf94c770be..eb73f39e6497f 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1432,6 +1432,7 @@ class LinearScan : public LinearScanInterface return nextConsecutiveRefPositionMap; } FORCEINLINE RefPosition* getNextConsecutiveRefPosition(RefPosition* refPosition); + void getLowVectorOperandAndCandidates(HWIntrinsic intrin, int* operandNum, regMaskTP* candidates); #endif #ifdef DEBUG diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index de363c8645a2e..df06a82e2c307 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1843,7 +1843,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op1 != nullptr); bool forceOp2DelayFree = false; - regMaskTP candidates = RBM_NONE; + regMaskTP lowVectorCandidates = RBM_NONE; + int lowVectorOperandNum = -1; if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement)) { if (!intrin.op2->IsCnsIntOrI() && (!intrin.op1->isContained() || intrin.op1->OperIsLocal())) @@ -1865,36 +1866,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou compiler->getSIMDInitTempVarNum(requiredSimdTempType); } } - - if ((intrin.id == NI_Sve_FusedMultiplyAddBySelectedScalar) || - (intrin.id == NI_Sve_FusedMultiplySubtractBySelectedScalar)) - { - // If this is common pattern, then we will add a flag in the table, but for now, just check for specific - // intrinsics - if (intrin.baseType == TYP_DOUBLE) - { - candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS; - } - else - { - assert(intrin.baseType == TYP_FLOAT); - candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS; - } - } - else if (intrin.id == NI_Sve_DotProductBySelectedScalar) + else if (HWIntrinsicInfo::IsLowVectorOperation(intrin.id)) { - unsigned baseElementSize = genTypeSize(intrin.baseType); - // If this is common pattern, then we will add a flag in the table, but for now, just check for specific - // intrinsics - if (baseElementSize == 8) - { - candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS; - } - else - { - assert(baseElementSize == 4); - candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS; - } + getLowVectorOperandAndCandidates(intrin, &lowVectorOperandNum, &lowVectorCandidates); } if ((intrin.id == NI_Sve_ConditionalSelect) && (intrin.op2->IsEmbMaskOp()) && @@ -1905,33 +1879,45 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou GenTreeHWIntrinsic* intrinEmbOp2 = intrin.op2->AsHWIntrinsic(); size_t numArgs = intrinEmbOp2->GetOperandCount(); assert((numArgs == 1) || (numArgs == 2)); + const HWIntrinsic intrinEmb(intrinEmbOp2); + if (HWIntrinsicInfo::IsLowVectorOperation(intrinEmb.id)) + { + getLowVectorOperandAndCandidates(intrinEmb, &lowVectorOperandNum, &lowVectorCandidates); + } + tgtPrefUse = BuildUse(intrinEmbOp2->Op(1)); srcCount += 1; for (size_t argNum = 2; argNum <= numArgs; argNum++) { - srcCount += BuildDelayFreeUses(intrinEmbOp2->Op(argNum), intrinEmbOp2->Op(1)); + srcCount += BuildDelayFreeUses(intrinEmbOp2->Op(argNum), intrinEmbOp2->Op(1), + (argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE); } } else { + regMaskTP candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE; if (forceOp2DelayFree) { - srcCount += BuildDelayFreeUses(intrin.op2); + srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates); } else { - srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1) : BuildOperandUses(intrin.op2); + srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates) + : BuildOperandUses(intrin.op2, candidates); } } if (intrin.op3 != nullptr) { + regMaskTP candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE; + srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates) : BuildOperandUses(intrin.op3, candidates); if (intrin.op4 != nullptr) { + assert(lowVectorOperandNum != 4); srcCount += isRMW ? BuildDelayFreeUses(intrin.op4, intrin.op1) : BuildOperandUses(intrin.op4); } } @@ -2222,8 +2208,48 @@ bool RefPosition::isLiveAtConsecutiveRegistersLoc(LsraLocation consecutiveRegist } return false; } -#endif +#endif // DEBUG -#endif +//------------------------------------------------------------------------ +// getLowVectorOperandAndCandidates: Instructions for certain intrinsics operate on low vector registers +// depending on the size of the element. The method returns the candidates based on that size and +// the operand number of the intrinsics that has the restriction. +// +// Arguments: +// intrin - Intrinsics +// operandNum (out) - The operand number having the low vector register restriction +// candidates (out) - The restricted low vector registers +// +void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, int* operandNum, regMaskTP* candidates) +{ + assert(HWIntrinsicInfo::IsLowVectorOperation(intrin.id)); + unsigned baseElementSize = genTypeSize(intrin.baseType); + + if (baseElementSize == 8) + { + *candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS; + } + else + { + assert(baseElementSize == 4); + *candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS; + } + + switch (intrin.id) + { + case NI_Sve_DotProductBySelectedScalar: + case NI_Sve_FusedMultiplyAddBySelectedScalar: + case NI_Sve_FusedMultiplySubtractBySelectedScalar: + *operandNum = 3; + break; + case NI_Sve_MultiplyBySelectedScalar: + *operandNum = 2; + break; + default: + unreached(); + } +} + +#endif // FEATURE_HW_INTRINSICS #endif // TARGET_ARM64 From 636b73102126f60e248cc815caa3ffc1eac648ee Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 14:14:40 -0700 Subject: [PATCH 7/8] jit format --- src/coreclr/jit/codegenarm64test.cpp | 5 +++-- src/coreclr/jit/emitarm64.cpp | 2 +- src/coreclr/jit/lsraarm64.cpp | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 31e2765e73706..8cf6a5181ef8b 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4998,8 +4998,9 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_PREDICATE_MERGE); /* MOV .B, /M, .B */ // IF_SVE_CZ_4A_L - theEmitter->emitIns_Mov(INS_sve_mov, EA_SCALABLE, REG_P0, REG_P15, /* canSkip */ false, INS_OPTS_SCALABLE_B); /* MOV .B, .B - */ + theEmitter->emitIns_Mov(INS_sve_mov, EA_SCALABLE, REG_P0, REG_P15, /* canSkip */ false, + INS_OPTS_SCALABLE_B); /* MOV .B, .B + */ // IF_SVE_DA_4A theEmitter->emitIns_R_R_R_R(INS_sve_brkpa, EA_SCALABLE, REG_P0, REG_P1, REG_P10, REG_P15, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index cc19890aa0e3a..abe40f25937f6 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4283,7 +4283,7 @@ void emitter::emitIns_Mov( return; } srcReg = encodingSPtoZR(srcReg); - fmt = IF_SVE_CB_2A; + fmt = IF_SVE_CB_2A; } else { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index df06a82e2c307..007ba7ac25144 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1842,7 +1842,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op1 != nullptr); - bool forceOp2DelayFree = false; + bool forceOp2DelayFree = false; regMaskTP lowVectorCandidates = RBM_NONE; int lowVectorOperandNum = -1; if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement)) From 6e076fc98cae73eae7eb62bced59979aa249087d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 May 2024 11:50:34 -0700 Subject: [PATCH 8/8] fix the build error --- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index eb73f39e6497f..7d1624a578045 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1432,7 +1432,7 @@ class LinearScan : public LinearScanInterface return nextConsecutiveRefPositionMap; } FORCEINLINE RefPosition* getNextConsecutiveRefPosition(RefPosition* refPosition); - void getLowVectorOperandAndCandidates(HWIntrinsic intrin, int* operandNum, regMaskTP* candidates); + void getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, regMaskTP* candidates); #endif #ifdef DEBUG diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 007ba7ac25144..c133a4a691776 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1844,7 +1844,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou bool forceOp2DelayFree = false; regMaskTP lowVectorCandidates = RBM_NONE; - int lowVectorOperandNum = -1; + size_t lowVectorOperandNum = 0; if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement)) { if (!intrin.op2->IsCnsIntOrI() && (!intrin.op1->isContained() || intrin.op1->OperIsLocal())) @@ -2220,7 +2220,7 @@ bool RefPosition::isLiveAtConsecutiveRegistersLoc(LsraLocation consecutiveRegist // operandNum (out) - The operand number having the low vector register restriction // candidates (out) - The restricted low vector registers // -void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, int* operandNum, regMaskTP* candidates) +void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, regMaskTP* candidates) { assert(HWIntrinsicInfo::IsLowVectorOperation(intrin.id)); unsigned baseElementSize = genTypeSize(intrin.baseType);