Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support mulx returning ValueTuple #37928

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,179 changes: 4,179 additions & 0 deletions build-tests.out

Large diffs are not rendered by default.

2,351 changes: 2,351 additions & 0 deletions build.out

Large diffs are not rendered by default.

1,299 changes: 1,299 additions & 0 deletions libs.out

Large diffs are not rendered by default.

23 changes: 0 additions & 23 deletions src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19055,29 +19055,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore() const
{
return true;
}
#ifdef TARGET_XARCH
else if (HWIntrinsicInfo::MaybeMemoryStore(gtHWIntrinsicId) &&
(category == HW_Category_IMM || category == HW_Category_Scalar))
{
// Some intrinsics (without HW_Category_MemoryStore) also have MemoryStore semantics

// Bmi2/Bmi2.X64.MultiplyNoFlags may return the lower half result by a out argument
// unsafe ulong MultiplyNoFlags(ulong left, ulong right, ulong* low)
//
// So, the 3-argument form is MemoryStore
if (HWIntrinsicInfo::lookupNumArgs(this) == 3)
{
switch (gtHWIntrinsicId)
{
case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags:
return true;
default:
return false;
}
}
}
#endif // TARGET_XARCH
#endif // TARGET_XARCH || TARGET_ARM64
return false;
}
Expand Down
22 changes: 17 additions & 5 deletions src/coreclr/src/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -785,14 +785,26 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

if ((retType == TYP_STRUCT) && featureSIMD)
{
unsigned int sizeBytes;
baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
retType = getSIMDTypeForSize(sizeBytes);
assert(sizeBytes != 0);
unsigned int sizeBytes = 0;
baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
bool checkBaseType = true;
#ifdef TARGET_XARCH
if (sizeBytes == 0)
{
assert((intrinsic == NI_BMI2_MultiplyNoFlags2) || (intrinsic == NI_BMI2_X64_MultiplyNoFlags2));
assert(retType == TYP_STRUCT);
checkBaseType = false;
}
else
#endif
{
retType = getSIMDTypeForSize(sizeBytes);
assert(retType != TYP_STRUCT);
}

// We want to return early here for cases where retType was TYP_STRUCT as per method signature and
// rather than deferring the decision after getting the baseType of arg.
if (!isSupportedBaseType(intrinsic, baseType))
if (checkBaseType && !isSupportedBaseType(intrinsic, baseType))
{
return nullptr;
}
Expand Down
68 changes: 28 additions & 40 deletions src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1851,8 +1851,12 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node)
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
var_types targetType = node->TypeGet();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);
emitter* emit = GetEmitter();
if ((intrinsicId == NI_BMI2_MultiplyNoFlags2) || (intrinsicId == NI_BMI2_X64_MultiplyNoFlags2))
{
targetType = op1->TypeGet();
}
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);
emitter* emit = GetEmitter();

assert(targetReg != REG_NA);
assert(op1 != nullptr);
Expand Down Expand Up @@ -1901,60 +1905,44 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node)
}

case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_MultiplyNoFlags2:
case NI_BMI2_X64_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags2:
{
int numArgs = HWIntrinsicInfo::lookupNumArgs(node);
assert(numArgs == 2 || numArgs == 3);
assert(numArgs == 2);

regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
regNumber op3Reg = REG_NA;
regNumber lowReg = REG_NA;
regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
regNumber lowReg = targetReg;
regNumber highReg = targetReg;

if (numArgs == 2)
op1Reg = op1->GetRegNum();
op2Reg = op2->GetRegNum();
if ((intrinsicId == NI_BMI2_MultiplyNoFlags2) || (intrinsicId == NI_BMI2_X64_MultiplyNoFlags2))
{
op1Reg = op1->GetRegNum();
op2Reg = op2->GetRegNum();
lowReg = targetReg;
}
else
{
GenTreeArgList* argList = op1->AsArgList();
op1 = argList->Current();
op1Reg = op1->GetRegNum();
argList = argList->Rest();
op2 = argList->Current();
op2Reg = op2->GetRegNum();
argList = argList->Rest();
GenTree* op3 = argList->Current();
op3Reg = op3->GetRegNum();
assert(!op3->isContained());
assert(op3Reg != op1Reg);
assert(op3Reg != targetReg);
assert(op3Reg != REG_EDX);
lowReg = node->GetSingleTempReg();
assert(op3Reg != lowReg);
assert(lowReg != targetReg);
highReg = node->AsHWIntrinsic()->GetOtherReg();
}

// These do not support containment
assert(!op2->isContained());
emitAttr attr = emitTypeSize(targetType);
emitAttr attr = emitTypeSize(targetType);
GenTree* explicitSrc = op2;
// mov the first operand into implicit source operand EDX/RDX
if (op1Reg != REG_EDX)
{
assert(op2Reg != REG_EDX);
emit->emitIns_R_R(INS_mov, attr, REG_EDX, op1Reg);
if (op2Reg == REG_EDX)
{
explicitSrc = op1;
}
else
{
emit->emitIns_R_R(INS_mov, attr, REG_EDX, op1Reg);
}
}

// generate code for MULX
genHWIntrinsic_R_R_RM(node, ins, attr, targetReg, lowReg, op2);

// If requires the lower half result, store in the memory pointed to by op3
if (numArgs == 3)
{
emit->emitIns_AR_R(INS_mov, attr, lowReg, op3Reg, 0);
}
genHWIntrinsic_R_R_RM(node, ins, attr, highReg, lowReg, explicitSrc);

break;
}
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/src/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,8 @@ HARDWARE_INTRINSIC(BMI1_X64, BitFieldExtract,
HARDWARE_INTRINSIC(BMI2, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to re-enable containment support with this PR also? And MaybeMemoryStore shouldn't be needed with this?

HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags2, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand All @@ -624,7 +625,8 @@ HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags,
HARDWARE_INTRINSIC(BMI2_X64, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2_X64, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2_X64, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2_X64, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI2_X64, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(BMI2_X64, MultiplyNoFlags2, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand Down
28 changes: 28 additions & 0 deletions src/coreclr/src/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,34 @@ GenTree* Compiler::impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METH
return gtNewScalarHWIntrinsicNode(callType, op2, op1, intrinsic);
}

case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_MultiplyNoFlags2:
case NI_BMI2_X64_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags2:
{
if (sig->numArgs == 3)
{
// These are implemented in terms of NI_BMI2_MultiplyNoFlags2 or NI_BMI2_X64_MultiplyNoFlags2.
assert((intrinsic == NI_BMI2_MultiplyNoFlags) || (intrinsic == NI_BMI2_X64_MultiplyNoFlags));
{
return nullptr;
}
}
assert(sig->numArgs == 2);
GenTree* op2 = impPopStack().val;
GenTree* op1 = impPopStack().val;

if (callType == TYP_STRUCT)
{
CORINFO_CLASS_HANDLE structHandle = sig->retTypeSigClass;
assert(structHandle != NO_CLASS_HANDLE);
GenTreeHWIntrinsic* result = gtNewScalarHWIntrinsicNode(TYP_STRUCT, op1, op2, intrinsic);
result->SetLayout(typGetObjLayout(structHandle));
return result;
}
return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
}

default:
return nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1281,7 +1281,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr,
}

assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_FIELD, GT_IND, GT_OBJ, GT_CALL, GT_MKREFANY, GT_RET_EXPR, GT_COMMA) ||
(src->TypeGet() != TYP_STRUCT && src->OperIsSimdOrHWintrinsic()));
src->OperIsSimdOrHWintrinsic());

var_types asgType = src->TypeGet();

Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6173,10 +6173,10 @@ bool Lowering::CheckMultiRegLclVar(GenTreeLclVar* lclNode, const ReturnTypeDesc*
}
}
#ifdef TARGET_XARCH
// For local stores on XARCH we only handle mismatched src/dest register count for
// calls of SIMD type. If the source was another lclVar similarly promoted, we would
// For local stores on XARCH we can't handle another lclVar source.
// If the source was another lclVar similarly promoted, we would
// have broken it into multiple stores.
if (lclNode->OperIs(GT_STORE_LCL_VAR) && !lclNode->gtGetOp1()->OperIs(GT_CALL))
if (lclNode->OperIs(GT_STORE_LCL_VAR) && lclNode->gtGetOp1()->OperIs(GT_LCL_VAR))
{
canEnregister = false;
}
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/src/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,16 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
#endif
}
}
else if (src->OperIsHWIntrinsic())
{
assert(!blkNode->AsObj()->GetLayout()->HasGCPtr());
if (blkNode->OperIs(GT_STORE_OBJ))
{
blkNode->SetOper(GT_STORE_BLK);
}
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
ContainBlockStoreAddress(blkNode, size, dstAddr);
}
else
{
assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD));
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -1068,7 +1068,7 @@ class LinearScan : public LinearScanInterface
var_types type = tree->TypeGet();
if (type == TYP_STRUCT)
{
assert(tree->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR));
assert(tree->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR) || tree->OperIsHWIntrinsic());
GenTreeLclVar* lclVar = tree->AsLclVar();
LclVarDsc* varDsc = compiler->lvaGetDesc(lclVar);
type = varDsc->GetRegisterType(lclVar);
Expand Down
Loading