Skip to content

Commit

Permalink
Reland: [LV]: Teach LV to recursively (de)interleave. (#122989)
Browse files Browse the repository at this point in the history
This commit relands the changes from "[LV]: Teach LV to recursively
(de)interleave. #89018"

Reason for revert:
- The patch exposed a bug in the IA pass, the bug is now fixed and landed by commit: #122643
  • Loading branch information
hassnaaHamdi authored Jan 17, 2025
1 parent e79bb87 commit 9491f75
Show file tree
Hide file tree
Showing 6 changed files with 1,387 additions and 671 deletions.
14 changes: 7 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3505,10 +3505,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
if (hasIrregularType(ScalarTy, DL))
return false;

// We currently only know how to emit interleave/deinterleave with
// Factor=2 for scalable vectors. This is purely an implementation
// limit.
if (VF.isScalable() && InterleaveFactor != 2)
// For scalable vectors, the only interleave factor currently supported
// must be power of 2 since we require the (de)interleave2 intrinsics
// instead of shufflevectors.
if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
return false;

// If the group involves a non-integral pointer, we may not be able to
Expand Down Expand Up @@ -9435,9 +9435,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
// For scalable vectors, the only interleave factor currently supported
// is 2 since we require the (de)interleave2 intrinsics instead of
// shufflevectors.
assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
// must be power of 2 since we require the (de)interleave2 intrinsics
// instead of shufflevectors.
assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
"Unsupported interleave factor for scalable vectors");
return Result;
};
Expand Down
79 changes: 56 additions & 23 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2863,10 +2863,21 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
// must use intrinsics to interleave.
if (VecTy->isScalableTy()) {
VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
Vals,
/*FMFSource=*/nullptr, Name);
assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
"scalable vectors, must be power of 2");
SmallVector<Value *> InterleavingValues(Vals);
// When interleaving, the number of values will be shrunk until we have the
// single final interleaved value.
auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
for (unsigned I = 0; I < Midpoint; ++I)
InterleavingValues[I] = Builder.CreateIntrinsic(
InterleaveTy, Intrinsic::vector_interleave2,
{InterleavingValues[I], InterleavingValues[Midpoint + I]},
/*FMFSource=*/nullptr, Name);
}
return InterleavingValues[0];
}

// Fixed length. Start by concatenating all vectors into a wide vector.
Expand Down Expand Up @@ -2952,15 +2963,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
&InterleaveFactor](Value *MaskForGaps) -> Value * {
if (State.VF.isScalable()) {
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
assert(InterleaveFactor == 2 &&
assert(isPowerOf2_32(InterleaveFactor) &&
"Unsupported deinterleave factor for scalable vectors");
auto *ResBlockInMask = State.get(BlockInMask);
SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
State.VF.getKnownMinValue() * 2, true);
return State.Builder.CreateIntrinsic(
MaskTy, Intrinsic::vector_interleave2, Ops,
/*FMFSource=*/nullptr, "interleaved.mask");
SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
return interleaveVectors(State.Builder, Ops, "interleaved.mask");
}

if (!BlockInMask)
Expand Down Expand Up @@ -3000,22 +3007,48 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
ArrayRef<VPValue *> VPDefs = definedValues();
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
if (VecTy->isScalableTy()) {
assert(InterleaveFactor == 2 &&
assert(isPowerOf2_32(InterleaveFactor) &&
"Unsupported deinterleave factor for scalable vectors");

// Scalable vectors cannot use arbitrary shufflevectors (only splats),
// so must use intrinsics to deinterleave.
Value *DI = State.Builder.CreateIntrinsic(
Intrinsic::vector_deinterleave2, VecTy, NewLoad,
/*FMFSource=*/nullptr, "strided.vec");
unsigned J = 0;
for (unsigned I = 0; I < InterleaveFactor; ++I) {
Instruction *Member = Group->getMember(I);
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
// so must use intrinsics to deinterleave.
SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
DeinterleavedValues[0] = NewLoad;
// For the case of InterleaveFactor > 2, we will have to do recursive
// deinterleaving, because the current available deinterleave intrinsic
// supports only Factor of 2, otherwise it will bailout after first
// iteration.
// When deinterleaving, the number of values will double until we
// have "InterleaveFactor".
for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
NumVectors *= 2) {
// Deinterleave the elements within the vector
SmallVector<Value *> TempDeinterleavedValues(NumVectors);
for (unsigned I = 0; I < NumVectors; ++I) {
auto *DiTy = DeinterleavedValues[I]->getType();
TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
/*FMFSource=*/nullptr, "strided.vec");
}
// Extract the deinterleaved values:
for (unsigned I = 0; I < 2; ++I)
for (unsigned J = 0; J < NumVectors; ++J)
DeinterleavedValues[NumVectors * I + J] =
State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
}

if (!Member)
#ifndef NDEBUG
for (Value *Val : DeinterleavedValues)
assert(Val && "NULL Deinterleaved Value");
#endif
for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
Instruction *Member = Group->getMember(I);
Value *StridedVec = DeinterleavedValues[I];
if (!Member) {
// This value is not needed as it's not used
cast<Instruction>(StridedVec)->eraseFromParent();
continue;

Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
}
// If this member has different type, cast the result type.
if (Member->getType() != ScalarTy) {
VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
Expand Down
Loading

0 comments on commit 9491f75

Please sign in to comment.