Skip to content

Commit 5d7cf50

Browse files
committed
[SLP]Fix PR108421: Correctly deduce VF from the masks
Need to select the max of CommonMask and V1 Mask size to correctly perform reshuffling of the vectors, otherwise incorrect result is generated. Fixes llvm#108421
1 parent 95eab0d commit 5d7cf50

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12486,11 +12486,12 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1248612486
V = createShuffle(InVectors.front(), nullptr, CommonMask);
1248712487
transformMaskAfterShuffle(CommonMask, CommonMask);
1248812488
}
12489+
unsigned VF = std::max(CommonMask.size(), Mask.size());
1248912490
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
1249012491
if (CommonMask[Idx] == PoisonMaskElem && Mask[Idx] != PoisonMaskElem)
1249112492
CommonMask[Idx] =
1249212493
V->getType() != V1->getType()
12493-
? Idx + Sz
12494+
? Idx + VF
1249412495
: Mask[Idx] + cast<FixedVectorType>(V1->getType())
1249512496
->getNumElements();
1249612497
if (V->getType() != V1->getType())

llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ define i32 @foo() {
66
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: [[ENTRY:.*:]]
88
; CHECK-NEXT: [[D:%.*]] = load i32, ptr null, align 4
9-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>, i32 [[D]], i32 1
9+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1
1010
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
1111
; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]]
1212
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]

0 commit comments

Comments
 (0)