From fd6206227e3ce34fed224045acfd8104295d96e2 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 10 Dec 2025 11:44:55 +0000 Subject: [PATCH] [LV] Avoid SCEVChecks when IV update doesn't overflow We already check when IV update overflow checks are needed in different places in LV: consolidate them into a single routine, and re-use it to conditionally drop SCEVChecks in GeneratedRTChecks. --- .../Transforms/Vectorize/LoopVectorize.cpp | 148 +++--- .../AArch64/backedge-overflow.ll | 16 +- .../AArch64/conditional-branches-cost.ll | 183 +++----- .../AArch64/divs-with-scalable-vfs.ll | 84 +--- .../AArch64/epilog-vectorization-factors.ll | 11 +- .../epilog-vectorization-widen-inductions.ll | 53 ++- .../AArch64/induction-costs-sve.ll | 123 +---- .../LoopVectorize/AArch64/induction-costs.ll | 15 +- .../AArch64/low_trip_count_predicates.ll | 237 +++++++--- .../LoopVectorize/AArch64/predicated-costs.ll | 61 +-- .../AArch64/simple_early_exit.ll | 19 +- ...-narrow-interleave-to-widen-memory-cost.ll | 55 +-- .../PowerPC/optimal-epilog-vectorization.ll | 22 +- .../PowerPC/vplan-scalarivsext-crash.ll | 17 +- .../RISCV/gather-scatter-cost.ll | 14 +- .../RISCV/masked_gather_scatter.ll | 24 +- .../RISCV/riscv-vector-reverse.ll | 60 +-- .../LoopVectorize/RISCV/strided-accesses.ll | 79 +--- .../LoopVectorize/X86/cost-model.ll | 33 +- .../X86/divs-with-tail-folding.ll | 63 +-- .../X86/epilog-vectorization-inductions.ll | 12 +- .../illegal-parallel-loop-uniform-write.ll | 16 +- .../LoopVectorize/X86/interleave-cost.ll | 101 +--- .../LoopVectorize/X86/multi-exit-cost.ll | 16 +- .../Transforms/LoopVectorize/X86/pr35432.ll | 23 +- .../Transforms/LoopVectorize/X86/pr72969.ll | 22 +- .../X86/replicating-load-store-costs.ll | 5 +- .../X86/scev-checks-unprofitable.ll | 34 +- .../X86/x86_fp80-vector-store.ll | 12 +- .../LoopVectorize/branch-weights.ll | 16 +- ...log-vectorization-trunc-induction-steps.ll | 12 +- .../first-order-recurrence-complex.ll | 22 +- .../Transforms/LoopVectorize/induction.ll | 436 ++++-------------- ...aved-accesses-requiring-scev-predicates.ll | 45 +- .../LoopVectorize/load-deref-pred-align.ll | 19 +- ...o-fold-tail-by-masking-iv-external-uses.ll | 16 +- .../Transforms/LoopVectorize/opaque-ptr.ll | 25 +- .../optimal-epilog-vectorization.ll | 16 +- llvm/test/Transforms/LoopVectorize/optsize.ll | 38 +- .../LoopVectorize/pointer-induction.ll | 22 +- .../pr30654-phiscev-sext-trunc.ll | 86 +--- llvm/test/Transforms/LoopVectorize/pr34681.ll | 28 +- llvm/test/Transforms/LoopVectorize/pr37248.ll | 34 +- llvm/test/Transforms/LoopVectorize/pr45259.ll | 13 +- .../preserve-dbg-loc-and-loop-metadata.ll | 40 +- .../reduction-minmax-users-and-predicated.ll | 9 +- .../LoopVectorize/reverse_induction.ll | 30 +- .../LoopVectorize/runtime-check-known-true.ll | 13 +- .../runtime-check-small-clamped-bounds.ll | 30 +- .../runtime-checks-difference.ll | 17 +- .../scev-exit-phi-invalidation.ll | 43 +- .../LoopVectorize/scev-predicate-reasoning.ll | 40 +- .../LoopVectorize/single_early_exit.ll | 17 +- ...ive-path-inner-loop-with-runtime-checks.ll | 37 +- .../X86/SROA-after-final-loop-unrolling-2.ll | 82 ++-- 55 files changed, 830 insertions(+), 1914 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b3577e4ecbca8..d8349be22b4ad 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1766,6 +1766,64 @@ class LoopVectorizationCostModel { }; } // end namespace llvm +static std::optional getMaxVScale(const Function &F, + const TargetTransformInfo &TTI) { + if (std::optional MaxVScale = TTI.getMaxVScale()) + return MaxVScale; + + if (F.hasFnAttribute(Attribute::VScaleRange)) + return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax(); + + return std::nullopt; +} + +/// For the given VF and UF and maximum trip count computed for the loop, return +/// whether the induction variable might overflow in the vectorized loop. If +/// not, then we know a runtime overflow check always evaluates to false and can +/// be removed. +static bool +isIndvarOverflowCheckKnownFalse(const LoopVectorizationCostModel *Cost, + ElementCount VF, + std::optional UF = std::nullopt) { + // Always be conservative if we don't know the exact unroll factor. + unsigned MaxUF = UF ? *UF : Cost->TTI.getMaxInterleaveFactor(VF); + + IntegerType *IdxTy = Cost->Legal->getWidestInductionType(); + APInt MaxUIntTripCount = IdxTy->getMask(); + + // We know the runtime overflow check is known false iff the (max) trip-count + // is known and (max) trip-count + (VF * UF) does not overflow in the type of + // the vector loop induction variable. + if (unsigned TC = Cost->PSE.getSmallConstantMaxTripCount()) { + uint64_t MaxVF = VF.getKnownMinValue(); + if (VF.isScalable()) { + std::optional MaxVScale = + getMaxVScale(*Cost->TheFunction, Cost->TTI); + if (!MaxVScale) + return false; + MaxVF *= *MaxVScale; + } + + return (MaxUIntTripCount - TC).ugt(MaxVF * MaxUF); + } + + return false; +} + +/// Checks whether an IndVar overflow check is needed using +/// isIndvarOverflowCheckKnownFalse, with additional information about the +/// tail-folding style. +static bool isIndvarOverflowCheckNeeded(const LoopVectorizationCostModel &CM, + ElementCount VF, unsigned IC) { + // vscale is not necessarily a power-of-2, which means we cannot guarantee + // an overflow to zero when updating induction variables and so an + // additional overflow check is required before entering the vector loop. + return VF.isScalable() && !CM.TTI.isVScaleKnownToBeAPowerOfTwo() && + !isIndvarOverflowCheckKnownFalse(&CM, VF, IC) && + CM.getTailFoldingStyle() != + TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; +} + namespace { /// Helper struct to manage generating runtime checks for vectorization. /// @@ -1790,7 +1848,6 @@ class GeneratedRTChecks { DominatorTree *DT; LoopInfo *LI; - TargetTransformInfo *TTI; SCEVExpander SCEVExp; SCEVExpander MemCheckExp; @@ -1801,17 +1858,16 @@ class GeneratedRTChecks { PredicatedScalarEvolution &PSE; - /// The kind of cost that we are calculating - TTI::TargetCostKind CostKind; + /// The CostModel. + const LoopVectorizationCostModel &CM; public: GeneratedRTChecks(PredicatedScalarEvolution &PSE, DominatorTree *DT, - LoopInfo *LI, TargetTransformInfo *TTI, - TTI::TargetCostKind CostKind) - : DT(DT), LI(LI), TTI(TTI), + LoopInfo *LI, LoopVectorizationCostModel &CM) + : DT(DT), LI(LI), SCEVExp(*PSE.getSE(), "scev.check", /*PreserveLCSSA=*/false), MemCheckExp(*PSE.getSE(), "scev.check", /*PreserveLCSSA=*/false), - PSE(PSE), CostKind(CostKind) {} + PSE(PSE), CM(CM) {} /// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can /// accurately estimate the cost of the runtime checks. The blocks are @@ -1833,11 +1889,18 @@ class GeneratedRTChecks { BasicBlock *LoopHeader = L->getHeader(); BasicBlock *Preheader = L->getLoopPreheader(); + // SCEVChecks are droppable when the UnionPred is always true, or when + // IndVar overflow checks are not needed, under the condition that we don't + // drop stride-versioning checks. + bool SCEVChecksAreDroppable = + UnionPred.isAlwaysTrue() || (!isIndvarOverflowCheckNeeded(CM, VF, IC) && + LAI.getSymbolicStrides().empty()); + // Use SplitBlock to create blocks for SCEV & memory runtime checks to // ensure the blocks are properly added to LoopInfo & DominatorTree. Those // may be used by SCEVExpander. The blocks will be un-linked from their // predecessors and removed from LI & DT at the end of the function. - if (!UnionPred.isAlwaysTrue()) { + if (!SCEVChecksAreDroppable) { SCEVCheckBlock = SplitBlock(Preheader, Preheader->getTerminator(), DT, LI, nullptr, "vector.scevcheck"); @@ -1935,7 +1998,7 @@ class GeneratedRTChecks { for (Instruction &I : *SCEVCheckBlock) { if (SCEVCheckBlock->getTerminator() == &I) continue; - InstructionCost C = TTI->getInstructionCost(&I, CostKind); + InstructionCost C = CM.TTI.getInstructionCost(&I, CM.CostKind); LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); RTCheckCost += C; } @@ -1944,7 +2007,7 @@ class GeneratedRTChecks { for (Instruction &I : *MemCheckBlock) { if (MemCheckBlock->getTerminator() == &I) continue; - InstructionCost C = TTI->getInstructionCost(&I, CostKind); + InstructionCost C = CM.TTI.getInstructionCost(&I, CM.CostKind); LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); MemCheckCost += C; } @@ -2222,49 +2285,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue, llvm_unreachable("invalid enum"); } -static std::optional getMaxVScale(const Function &F, - const TargetTransformInfo &TTI) { - if (std::optional MaxVScale = TTI.getMaxVScale()) - return MaxVScale; - - if (F.hasFnAttribute(Attribute::VScaleRange)) - return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax(); - - return std::nullopt; -} - -/// For the given VF and UF and maximum trip count computed for the loop, return -/// whether the induction variable might overflow in the vectorized loop. If not, -/// then we know a runtime overflow check always evaluates to false and can be -/// removed. -static bool isIndvarOverflowCheckKnownFalse( - const LoopVectorizationCostModel *Cost, - ElementCount VF, std::optional UF = std::nullopt) { - // Always be conservative if we don't know the exact unroll factor. - unsigned MaxUF = UF ? *UF : Cost->TTI.getMaxInterleaveFactor(VF); - - IntegerType *IdxTy = Cost->Legal->getWidestInductionType(); - APInt MaxUIntTripCount = IdxTy->getMask(); - - // We know the runtime overflow check is known false iff the (max) trip-count - // is known and (max) trip-count + (VF * UF) does not overflow in the type of - // the vector loop induction variable. - if (unsigned TC = Cost->PSE.getSmallConstantMaxTripCount()) { - uint64_t MaxVF = VF.getKnownMinValue(); - if (VF.isScalable()) { - std::optional MaxVScale = - getMaxVScale(*Cost->TheFunction, Cost->TTI); - if (!MaxVScale) - return false; - MaxVF *= *MaxVScale; - } - - return (MaxUIntTripCount - TC).ugt(MaxVF * MaxUF); - } - - return false; -} - // Return whether we allow using masked interleave-groups (for dealing with // strided loads/stores that reside in predicated blocks, or for dealing // with gaps). @@ -2354,13 +2374,7 @@ Value *EpilogueVectorizerMainLoop::createIterationCountCheck( // check is known to be true, or known to be false. CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check"); } // else step known to be < trip count, use CheckMinIters preset to false. - } else if (VF.isScalable() && !TTI->isVScaleKnownToBeAPowerOfTwo() && - !isIndvarOverflowCheckKnownFalse(Cost, VF, UF) && - Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) { - // vscale is not necessarily a power-of-2, which means we cannot guarantee - // an overflow to zero when updating induction variables and so an - // additional overflow check is required before entering the vector loop. - + } else if (isIndvarOverflowCheckNeeded(*Cost, VF, UF)) { // Get the maximum unsigned value for the type. Value *MaxUIntTripCount = ConstantInt::get(CountTy, cast(CountTy)->getMask()); @@ -9122,14 +9136,6 @@ void LoopVectorizationPlanner::attachRuntimeChecks( void LoopVectorizationPlanner::addMinimumIterationCheck( VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const { - // vscale is not necessarily a power-of-2, which means we cannot guarantee - // an overflow to zero when updating induction variables and so an - // additional overflow check is required before entering the vector loop. - bool IsIndvarOverflowCheckNeededForVF = - VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() && - !isIndvarOverflowCheckKnownFalse(&CM, VF, UF) && - CM.getTailFoldingStyle() != - TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; const uint32_t *BranchWeigths = hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()) ? &MinItersBypassWeights[0] @@ -9137,7 +9143,7 @@ void LoopVectorizationPlanner::addMinimumIterationCheck( VPlanTransforms::addMinimumIterationCheck( Plan, VF, UF, MinProfitableTripCount, CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(), - IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths, + isIndvarOverflowCheckNeeded(CM, VF, UF), OrigLoop, BranchWeigths, OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(), *PSE.getSE()); } @@ -9249,7 +9255,7 @@ static bool processLoopInVPlanNativePath( VPlan &BestPlan = LVP.getPlanFor(VF.Width); { - GeneratedRTChecks Checks(PSE, DT, LI, TTI, CM.CostKind); + GeneratedRTChecks Checks(PSE, DT, LI, CM); InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, /*UF=*/1, &CM, Checks, BestPlan); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" @@ -10085,7 +10091,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (ORE->allowExtraAnalysis(LV_NAME)) LVP.emitInvalidCostRemarks(ORE); - GeneratedRTChecks Checks(PSE, DT, LI, TTI, CM.CostKind); + GeneratedRTChecks Checks(PSE, DT, LI, CM); if (LVP.hasPlanWithVF(VF.Width)) { // Select the interleave count. IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll index 4f7195934a790..24d2e54a6a0d3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll @@ -10,8 +10,8 @@ ; CHECK-LABEL: test_sge -; CHECK-LABEL: vector.scevcheck -; CHECK-LABEL: vector.body +; CHECK-NOT: vector.scevcheck +; CHECK: vector.body define void @test_sge(ptr noalias %A, ptr noalias %B, ptr noalias %C, i32 %N) { @@ -48,8 +48,8 @@ for.end: } ; CHECK-LABEL: test_uge -; CHECK-LABEL: vector.scevcheck -; CHECK-LABEL: vector.body +; CHECK-NOT: vector.scevcheck +; CHECK: vector.body define void @test_uge(ptr noalias %A, ptr noalias %B, ptr noalias %C, i32 %N, i32 %Offset) { @@ -88,8 +88,8 @@ for.end: } ; CHECK-LABEL: test_ule -; CHECK-LABEL: vector.scevcheck -; CHECK-LABEL: vector.body +; CHECK-NOT: vector.scevcheck +; CHECK: vector.body define void @test_ule(ptr noalias %A, ptr noalias %B, ptr noalias %C, i32 %N, @@ -127,8 +127,8 @@ for.end: } ; CHECK-LABEL: test_sle -; CHECK-LABEL: vector.scevcheck -; CHECK-LABEL: vector.body +; CHECK-NOT: vector.scevcheck +; CHECK: vector.body define void @test_sle(ptr noalias %A, ptr noalias %B, ptr noalias %C, i32 %N, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index b549a06f08f8c..75ac8c1de36a1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -658,38 +658,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[ENTRY:.*:]] ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; DEFAULT: [[VECTOR_SCEVCHECK]]: -; DEFAULT-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]] -; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]] -; DEFAULT-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4 -; DEFAULT-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; DEFAULT-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; DEFAULT-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]] -; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP]] -; DEFAULT-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW3]] -; DEFAULT-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8 -; DEFAULT-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; DEFAULT-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 -; DEFAULT-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]] -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]] -; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]] -; DEFAULT-NEXT: [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]] -; DEFAULT-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]] -; DEFAULT-NEXT: br i1 [[TMP14]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8 ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE27:.*]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ] +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ] +; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE20]] ] ; DEFAULT-NEXT: [[TMP15:%.*]] = load float, ptr [[SRC_1]], align 4 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP15]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer @@ -723,8 +699,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] ; DEFAULT: [[PRED_STORE_CONTINUE]]: ; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <8 x i1> [[TMP22]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; DEFAULT: [[PRED_STORE_IF14]]: +; DEFAULT-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; DEFAULT: [[PRED_STORE_IF7]]: ; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP32]], align 4 ; DEFAULT-NEXT: [[TMP33:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1 @@ -735,11 +711,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4 ; DEFAULT-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; DEFAULT: [[PRED_STORE_CONTINUE15]]: +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; DEFAULT: [[PRED_STORE_CONTINUE8]]: ; DEFAULT-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP22]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] -; DEFAULT: [[PRED_STORE_IF16]]: +; DEFAULT-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; DEFAULT: [[PRED_STORE_IF9]]: ; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP39]], align 4 ; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2 @@ -750,11 +726,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4 ; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; DEFAULT: [[PRED_STORE_CONTINUE17]]: +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; DEFAULT: [[PRED_STORE_CONTINUE10]]: ; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP22]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] -; DEFAULT: [[PRED_STORE_IF18]]: +; DEFAULT-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; DEFAULT: [[PRED_STORE_IF11]]: ; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP46]], align 4 ; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3 @@ -765,11 +741,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4 ; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]] -; DEFAULT: [[PRED_STORE_CONTINUE19]]: +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; DEFAULT: [[PRED_STORE_CONTINUE12]]: ; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <8 x i1> [[TMP22]], i32 4 -; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]] -; DEFAULT: [[PRED_STORE_IF20]]: +; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] +; DEFAULT: [[PRED_STORE_IF13]]: ; DEFAULT-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP53]], align 4 ; DEFAULT-NEXT: [[TMP54:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4 @@ -780,11 +756,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4 ; DEFAULT-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]] -; DEFAULT: [[PRED_STORE_CONTINUE21]]: +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; DEFAULT: [[PRED_STORE_CONTINUE14]]: ; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP22]], i32 5 -; DEFAULT-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] -; DEFAULT: [[PRED_STORE_IF22]]: +; DEFAULT-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]] +; DEFAULT: [[PRED_STORE_IF15]]: ; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP60]], align 4 ; DEFAULT-NEXT: [[TMP61:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5 @@ -795,11 +771,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4 ; DEFAULT-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]] -; DEFAULT: [[PRED_STORE_CONTINUE23]]: +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE16]] +; DEFAULT: [[PRED_STORE_CONTINUE16]]: ; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <8 x i1> [[TMP22]], i32 6 -; DEFAULT-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] -; DEFAULT: [[PRED_STORE_IF24]]: +; DEFAULT-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]] +; DEFAULT: [[PRED_STORE_IF17]]: ; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP67]], align 4 ; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6 @@ -810,11 +786,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4 ; DEFAULT-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]] -; DEFAULT: [[PRED_STORE_CONTINUE25]]: +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE18]] +; DEFAULT: [[PRED_STORE_CONTINUE18]]: ; DEFAULT-NEXT: [[TMP73:%.*]] = extractelement <8 x i1> [[TMP22]], i32 7 -; DEFAULT-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]] -; DEFAULT: [[PRED_STORE_IF26]]: +; DEFAULT-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]] +; DEFAULT: [[PRED_STORE_IF19]]: ; DEFAULT-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP74]], align 4 ; DEFAULT-NEXT: [[TMP75:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7 @@ -825,8 +801,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4 ; DEFAULT-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]] -; DEFAULT: [[PRED_STORE_CONTINUE27]]: +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; DEFAULT: [[PRED_STORE_CONTINUE20]]: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -840,31 +816,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-SAME: ptr noalias [[SRC_1:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[SRC_3:%.*]], ptr noalias [[SRC_4:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR3:[0-9]+]] { ; PRED-NEXT: [[ENTRY:.*:]] ; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; PRED: [[VECTOR_SCEVCHECK]]: -; PRED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; PRED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]] -; PRED-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]] -; PRED-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4 -; PRED-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; PRED-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; PRED-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]] -; PRED-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP]] -; PRED-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW3]] -; PRED-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8 -; PRED-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; PRED-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 -; PRED-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 -; PRED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]] -; PRED-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]] -; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]] -; PRED-NEXT: [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]] -; PRED-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]] -; PRED-NEXT: br i1 [[TMP14]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; PRED-NEXT: br label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP15:%.*]] = sub i64 [[TMP0]], 8 ; PRED-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[TMP0]], 8 @@ -872,9 +824,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 [[TMP0]]) ; PRED-NEXT: br label %[[VECTOR_BODY:.*]] ; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE27:.*]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ] +; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ] +; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE20]] ] +; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE20]] ] ; PRED-NEXT: [[TMP18:%.*]] = load float, ptr [[SRC_1]], align 4 ; PRED-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer @@ -909,8 +861,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] ; PRED: [[PRED_STORE_CONTINUE]]: ; PRED-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP26]], i32 1 -; PRED-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; PRED: [[PRED_STORE_IF14]]: +; PRED-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; PRED: [[PRED_STORE_IF7]]: ; PRED-NEXT: [[TMP36:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4 ; PRED-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1 @@ -921,11 +873,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4 ; PRED-NEXT: [[TMP41:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP41]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; PRED: [[PRED_STORE_CONTINUE15]]: +; PRED-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; PRED: [[PRED_STORE_CONTINUE8]]: ; PRED-NEXT: [[TMP42:%.*]] = extractelement <8 x i1> [[TMP26]], i32 2 -; PRED-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] -; PRED: [[PRED_STORE_IF16]]: +; PRED-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; PRED: [[PRED_STORE_IF9]]: ; PRED-NEXT: [[TMP43:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4 ; PRED-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2 @@ -936,11 +888,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4 ; PRED-NEXT: [[TMP48:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP48]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; PRED: [[PRED_STORE_CONTINUE17]]: +; PRED-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; PRED: [[PRED_STORE_CONTINUE10]]: ; PRED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP26]], i32 3 -; PRED-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] -; PRED: [[PRED_STORE_IF18]]: +; PRED-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; PRED: [[PRED_STORE_IF11]]: ; PRED-NEXT: [[TMP50:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4 ; PRED-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3 @@ -951,11 +903,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4 ; PRED-NEXT: [[TMP55:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP55]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE19]] -; PRED: [[PRED_STORE_CONTINUE19]]: +; PRED-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; PRED: [[PRED_STORE_CONTINUE12]]: ; PRED-NEXT: [[TMP56:%.*]] = extractelement <8 x i1> [[TMP26]], i32 4 -; PRED-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]] -; PRED: [[PRED_STORE_IF20]]: +; PRED-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] +; PRED: [[PRED_STORE_IF13]]: ; PRED-NEXT: [[TMP57:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4 ; PRED-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4 @@ -966,11 +918,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4 ; PRED-NEXT: [[TMP62:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP62]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE21]] -; PRED: [[PRED_STORE_CONTINUE21]]: +; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; PRED: [[PRED_STORE_CONTINUE14]]: ; PRED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP26]], i32 5 -; PRED-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] -; PRED: [[PRED_STORE_IF22]]: +; PRED-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]] +; PRED: [[PRED_STORE_IF15]]: ; PRED-NEXT: [[TMP64:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4 ; PRED-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5 @@ -981,11 +933,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4 ; PRED-NEXT: [[TMP69:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP69]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE23]] -; PRED: [[PRED_STORE_CONTINUE23]]: +; PRED-NEXT: br label %[[PRED_STORE_CONTINUE16]] +; PRED: [[PRED_STORE_CONTINUE16]]: ; PRED-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP26]], i32 6 -; PRED-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] -; PRED: [[PRED_STORE_IF24]]: +; PRED-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]] +; PRED: [[PRED_STORE_IF17]]: ; PRED-NEXT: [[TMP71:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4 ; PRED-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6 @@ -996,11 +948,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4 ; PRED-NEXT: [[TMP76:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP76]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE25]] -; PRED: [[PRED_STORE_CONTINUE25]]: +; PRED-NEXT: br label %[[PRED_STORE_CONTINUE18]] +; PRED: [[PRED_STORE_CONTINUE18]]: ; PRED-NEXT: [[TMP77:%.*]] = extractelement <8 x i1> [[TMP26]], i32 7 -; PRED-NEXT: br i1 [[TMP77]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]] -; PRED: [[PRED_STORE_IF26]]: +; PRED-NEXT: br i1 [[TMP77]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]] +; PRED: [[PRED_STORE_IF19]]: ; PRED-NEXT: [[TMP78:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4 ; PRED-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7 @@ -1011,8 +963,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP82]], align 4 ; PRED-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP83]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE27]] -; PRED: [[PRED_STORE_CONTINUE27]]: +; PRED-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; PRED: [[PRED_STORE_CONTINUE20]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) ; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 @@ -1020,8 +972,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br [[EXIT:label %.*]] -; PRED: [[SCALAR_PH]]: +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[EXIT]]: +; PRED-NEXT: ret void ; entry: br label %loop.header @@ -1124,7 +1077,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[EXIT]]: @@ -1313,7 +1266,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) ; PRED-NEXT: [[TMP28:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true -; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br [[EXIT:label %.*]] ; PRED: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 72e813b62025f..15f910d7c96aa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -9,16 +9,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8) -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP3]], 4294967295 -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 @@ -48,7 +40,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -97,14 +89,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64 -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 @@ -148,28 +133,6 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ule i64 [[IV]], [[M]] -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[DIV18:%.*]] = sdiv i64 [[M]], [[CONV6]] -; CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[DIV18]] to i32 -; CHECK-NEXT: [[MUL30:%.*]] = mul i64 [[DIV18]], [[CONV61]] -; CHECK-NEXT: [[SUB31:%.*]] = sub i64 [[IV]], [[MUL30]] -; CHECK-NEXT: [[CONV34:%.*]] = trunc i64 [[SUB31]] to i32 -; CHECK-NEXT: [[MUL35:%.*]] = mul i32 [[X]], [[CONV20]] -; CHECK-NEXT: [[ADD36:%.*]] = add i32 [[MUL35]], [[CONV34]] -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD36]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[DST]], i64 [[IDXPROM]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -212,13 +175,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[MUL_1_I:%.*]] = mul i64 [[X]], [[X]] ; CHECK-NEXT: [[MUL_2_I:%.*]] = mul i64 [[MUL_1_I]], [[X]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[N]], 4294967295 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 @@ -262,32 +219,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP39:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP40:%.*]] = xor i1 [[TMP39]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] -; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[DIV_I:%.*]] = udiv i64 [[IV]], [[MUL_2_I]] -; CHECK-NEXT: [[REM_I:%.*]] = urem i64 [[IV]], [[MUL_2_I]] -; CHECK-NEXT: [[DIV_1_I:%.*]] = udiv i64 [[REM_I]], [[MUL_1_I]] -; CHECK-NEXT: [[REM_1_I:%.*]] = urem i64 [[REM_I]], [[MUL_1_I]] -; CHECK-NEXT: [[DIV_2_I:%.*]] = udiv i64 [[REM_1_I]], [[X]] -; CHECK-NEXT: [[REM_2_I:%.*]] = urem i64 [[REM_1_I]], [[X]] -; CHECK-NEXT: [[MUL_I:%.*]] = mul i64 [[X]], [[DIV_I]] -; CHECK-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], [[DIV_1_I]] -; CHECK-NEXT: [[MUL_1_I9:%.*]] = mul i64 [[ADD_I]], [[X]] -; CHECK-NEXT: [[ADD_1_I:%.*]] = add i64 [[MUL_1_I9]], [[DIV_2_I]] -; CHECK-NEXT: [[MUL_2_I11:%.*]] = mul i64 [[ADD_1_I]], [[X]] -; CHECK-NEXT: [[ADD_2_I:%.*]] = add i64 [[MUL_2_I11]], [[REM_2_I]] -; CHECK-NEXT: [[SEXT_I:%.*]] = shl i64 [[ADD_2_I]], 32 -; CHECK-NEXT: [[CONV6_I:%.*]] = ashr i64 [[SEXT_I]], 32 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[CONV6_I]] -; CHECK-NEXT: store i64 [[DIV_I]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -326,9 +260,7 @@ exit: ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index 549df337e6907..8f17f400fc2f2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -430,13 +430,6 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[ADD]] to i2 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[X]] to i2 -; CHECK-NEXT: [[TMP5:%.*]] = sub i2 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i64 -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -466,7 +459,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC2:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF1]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC2]], 4 @@ -484,7 +477,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC2]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[VECTOR_SCEVCHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 85726c161cc54..9c2e1ec3815e0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -355,17 +355,62 @@ exit: define void @test_widen_extended_induction(ptr %dst) { ; CHECK-LABEL: @test_widen_extended_induction( -; CHECK-NEXT: entry: +; CHECK-NEXT: iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2) +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 2 +; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP2]], align 1 +; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], {{!llvm.loop ![0-9]+}} +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[BROADCAST_SPLAT]], +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX1]] to i8 +; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[OFFSET_IDX]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST]], i64 0, i64 [[TMP5]] +; CHECK-NEXT: store <2 x i8> [[VEC_IND2]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i32 [[INDEX1]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND2]], splat (i8 2) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 10000 +; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP1]] ] ; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[ARRAYIDX1449:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST:%.*]], i64 0, i64 [[IV_EXT]] +; CHECK-NEXT: [[ARRAYIDX1449:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST]], i64 0, i64 [[IV_EXT]] ; CHECK-NEXT: store i8 [[IV]], ptr [[ARRAYIDX1449]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[IV_NEXT_EXT:%.*]] = zext i8 [[IV_NEXT]] to i32 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT_EXT]], 10000 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP1]], {{!llvm.loop ![0-9]+}} ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 4b097ba2422e4..bafb65dab90a2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -213,24 +213,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[MUL_X:%.*]] = add i32 [[X]], 1 ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; DEFAULT: [[VECTOR_SCEVCHECK]]: -; DEFAULT-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]] -; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[MUL_X]], 0 -; DEFAULT-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[MUL_X]] -; DEFAULT-NEXT: [[TMP4:%.*]] = trunc i64 [[N]] to i32 -; DEFAULT-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP4]]) -; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 0, [[MUL_RESULT]] -; DEFAULT-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 0 -; DEFAULT-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 false -; DEFAULT-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; DEFAULT-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 4294967295 -; DEFAULT-NEXT: [[TMP10:%.*]] = icmp ne i32 [[MUL_X]], 0 -; DEFAULT-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; DEFAULT-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] @@ -254,7 +237,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] @@ -314,25 +297,7 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[X]] ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; DEFAULT: [[VECTOR_SCEVCHECK]]: -; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[X]], [[X]] -; DEFAULT-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]] -; DEFAULT-NEXT: [[TMP3:%.*]] = icmp slt i32 [[MUL]], 0 -; DEFAULT-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[MUL]] -; DEFAULT-NEXT: [[TMP5:%.*]] = trunc i64 [[N]] to i32 -; DEFAULT-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP4]], i32 [[TMP5]]) -; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; DEFAULT-NEXT: [[TMP6:%.*]] = sub i32 0, [[MUL_RESULT]] -; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], 0 -; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 false -; DEFAULT-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; DEFAULT-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], 4294967295 -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp ne i32 [[MUL]], 0 -; DEFAULT-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]] -; DEFAULT-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]] -; DEFAULT-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] @@ -359,12 +324,12 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[LOOP:.*]] ; DEFAULT: [[LOOP]]: ; DEFAULT-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; DEFAULT-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; DEFAULT-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; DEFAULT-NEXT: [[IV_1_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32 ; DEFAULT-NEXT: [[IV_1_MUL:%.*]] = mul i32 [[MUL]], [[IV_1_TRUNC]] ; DEFAULT-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 @@ -425,24 +390,7 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[ADD:%.*]] = add i32 [[X]], 1 ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; DEFAULT: [[VECTOR_SCEVCHECK]]: -; DEFAULT-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]] -; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[ADD]], 0 -; DEFAULT-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[ADD]] -; DEFAULT-NEXT: [[TMP4:%.*]] = trunc i64 [[N]] to i32 -; DEFAULT-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP4]]) -; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 0, [[MUL_RESULT]] -; DEFAULT-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 0 -; DEFAULT-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 false -; DEFAULT-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; DEFAULT-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 4294967295 -; DEFAULT-NEXT: [[TMP10:%.*]] = icmp ne i32 [[ADD]], 0 -; DEFAULT-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; DEFAULT-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] @@ -469,8 +417,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[LOOP:.*]] ; DEFAULT: [[LOOP]]: ; DEFAULT-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] @@ -535,16 +483,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; DEFAULT-NEXT: [[ENTRY:.*]]: ; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; DEFAULT: [[VECTOR_SCEVCHECK]]: -; DEFAULT-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) -; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 -; DEFAULT-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; DEFAULT-NEXT: [[TMP2:%.*]] = add i32 1, [[TMP1]] -; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1 -; DEFAULT-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; DEFAULT-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; DEFAULT-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2 ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] @@ -564,12 +503,12 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[LOOP:.*]] ; DEFAULT: [[LOOP]]: ; DEFAULT-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; DEFAULT-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ] +; DEFAULT-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ] ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2 ; DEFAULT-NEXT: store i32 0, ptr [[GEP]], align 8 ; DEFAULT-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 @@ -582,27 +521,18 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED-LABEL: define void @exit_cond_zext_iv( ; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { ; PRED-NEXT: [[ENTRY:.*:]] -; PRED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) -; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; PRED: [[VECTOR_SCEVCHECK]]: ; PRED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) -; PRED-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 -; PRED-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; PRED-NEXT: [[TMP2:%.*]] = add i32 1, [[TMP1]] -; PRED-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1 -; PRED-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; PRED-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; PRED-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; PRED-NEXT: br label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1 +; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], 1 ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1 +; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; PRED-NEXT: br label %[[VECTOR_BODY:.*]] ; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ] +; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer ; PRED-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT3]], @@ -616,29 +546,18 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] ; PRED: [[PRED_STORE_CONTINUE]]: ; PRED-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 -; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]] -; PRED: [[PRED_STORE_IF4]]: +; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]] +; PRED: [[PRED_STORE_IF3]]: ; PRED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; PRED-NEXT: [[TMP12:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP11]], i32 2 ; PRED-NEXT: store i32 0, ptr [[TMP12]], align 8 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]] -; PRED: [[PRED_STORE_CONTINUE5]]: +; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]] +; PRED: [[PRED_STORE_CONTINUE4]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; PRED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH]]: -; PRED-NEXT: br label %[[LOOP:.*]] -; PRED: [[LOOP]]: -; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2 -; PRED-NEXT: store i32 0, ptr [[GEP]], align 8 -; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 -; PRED-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64 -; PRED-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]] -; PRED-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 7b42e565e127d..2840e4de10164 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -356,16 +356,7 @@ define void @zext_iv_increment(ptr %dst, i64 %N) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 1, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] -; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] @@ -385,8 +376,8 @@ define void @zext_iv_increment(ptr %dst, i64 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index cf45f3a88f37e..d020d33e41585 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -26,8 +26,7 @@ target triple = "aarch64-unknown-linux-gnu" ; DEBUG: LV: Clamping the MaxVF to maximum power of two not exceeding the constant trip count: 16 ; DEBUG: LV: IC is 1 ; DEBUG: LV: VF is 16 -; DEBUG: LV: Vectorization is not beneficial: expected trip count < minimum profitable VF (16 < 32) -; DEBUG: LV: Too many memory checks needed. +; DEBUG: Executing best plan with VF=16, UF=1 ; DEBUG-LABEL: LV: Checking a loop in 'overflow_indvar_known_false' ; DEBUG: LV: Found trip count: 0 @@ -49,17 +48,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-VS1-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]] ; CHECK-VS1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8 -; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK-VS1: [[VECTOR_SCEVCHECK]]: -; CHECK-VS1-NEXT: [[TMP6:%.*]] = add i32 [[TC]], 1 -; CHECK-VS1-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-VS1-NEXT: [[TMP8:%.*]] = sub i64 19, [[TMP7]] -; CHECK-VS1-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 -; CHECK-VS1-NEXT: [[TMP10:%.*]] = add i32 [[TMP6]], [[TMP9]] -; CHECK-VS1-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP6]] -; CHECK-VS1-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 4294967295 -; CHECK-VS1-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]] -; CHECK-VS1-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK-VS1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-VS1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VS1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 4 @@ -112,7 +101,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-VS1-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-VS1: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ] +; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ] ; CHECK-VS1-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK-VS1: [[WHILE_BODY]]: ; CHECK-VS1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] @@ -142,17 +131,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-VS2-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]] ; CHECK-VS2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8 -; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK-VS2: [[VECTOR_SCEVCHECK]]: -; CHECK-VS2-NEXT: [[TMP6:%.*]] = add i32 [[TC]], 1 -; CHECK-VS2-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-VS2-NEXT: [[TMP8:%.*]] = sub i64 19, [[TMP7]] -; CHECK-VS2-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 -; CHECK-VS2-NEXT: [[TMP10:%.*]] = add i32 [[TMP6]], [[TMP9]] -; CHECK-VS2-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP6]] -; CHECK-VS2-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 4294967295 -; CHECK-VS2-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]] -; CHECK-VS2-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK-VS2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-VS2-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VS2-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3 @@ -205,7 +184,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-VS2-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-VS2: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ] +; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ] ; CHECK-VS2-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK-VS2: [[WHILE_BODY]]: ; CHECK-VS2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] @@ -303,31 +282,139 @@ define void @too_many_runtime_checks(ptr nocapture noundef %p, ptr nocapture nou ; CHECK-SAME: ptr noundef captures(none) [[P:%.*]], ptr noundef captures(none) [[P1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], ptr noundef readonly captures(none) [[P3:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TC]], 16 -; CHECK-NEXT: br i1 [[CMP20]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]] -; CHECK: [[WHILE_PREHEADER]]: +; CHECK-NEXT: br i1 [[CMP20]], label %[[ITER_CHECK:.*]], label %[[WHILE_END:.*]] +; CHECK: [[ITER_CHECK]]: ; CHECK-NEXT: [[CONV8:%.*]] = trunc i16 [[VAL]] to i8 ; CHECK-NEXT: [[V:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4 ; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TC]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TC]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 17, [[TMP3]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 17 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TC]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP1]], 4 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP1]], 21 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP11]] +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP8]] +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[P3]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[P3]], i64 [[TMP8]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP5]] +; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[SCEVGEP4]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT10]] +; CHECK-NEXT: [[BOUND011:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP7]] +; CHECK-NEXT: [[BOUND112:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT13:%.*]] = and i1 [[BOUND011]], [[BOUND112]] +; CHECK-NEXT: [[CONFLICT_RDX14:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT13]] +; CHECK-NEXT: [[BOUND015:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP5]] +; CHECK-NEXT: [[BOUND116:%.*]] = icmp ult ptr [[SCEVGEP4]], [[SCEVGEP3]] +; CHECK-NEXT: [[FOUND_CONFLICT17:%.*]] = and i1 [[BOUND015]], [[BOUND116]] +; CHECK-NEXT: [[CONFLICT_RDX18:%.*]] = or i1 [[CONFLICT_RDX14]], [[FOUND_CONFLICT17]] +; CHECK-NEXT: [[BOUND019:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP7]] +; CHECK-NEXT: [[BOUND120:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP3]] +; CHECK-NEXT: [[FOUND_CONFLICT21:%.*]] = and i1 [[BOUND019]], [[BOUND120]] +; CHECK-NEXT: [[CONFLICT_RDX22:%.*]] = or i1 [[CONFLICT_RDX18]], [[FOUND_CONFLICT21]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX22]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK23:%.*]] = icmp ult i64 [[TMP4]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK23]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[CONV8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = add i64 [[TMP1]], [[INDEX]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 1, !alias.scope [[META6:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP14]], align 1, !alias.scope [[META9:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = mul <16 x i8> [[WIDE_LOAD24]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] +; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i8> [[TMP15]], [[WIDE_LOAD25]] +; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP16]], align 1, !alias.scope [[META11]], !noalias [[META13]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = add <16 x i8> [[WIDE_LOAD26]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <16 x i8> [[TMP19]], ptr [[TMP18]], align 1, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF18:![0-9]+]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF27:%.*]] = urem i64 [[TMP4]], 4 +; CHECK-NEXT: [[N_VEC28:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF27]] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP1]], [[N_VEC28]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT29:%.*]] = insertelement <4 x i8> poison, i8 [[CONV8]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT30:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT29]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX31:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT36:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP1]], [[INDEX31]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1, !alias.scope [[META6]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <4 x i8>, ptr [[TMP23]], align 1, !alias.scope [[META9]] +; CHECK-NEXT: [[TMP24:%.*]] = mul <4 x i8> [[WIDE_LOAD33]], [[WIDE_LOAD32]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <4 x i8>, ptr [[TMP25]], align 1, !alias.scope [[META11]], !noalias [[META13]] +; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i8> [[TMP24]], [[WIDE_LOAD34]] +; CHECK-NEXT: store <4 x i8> [[TMP26]], ptr [[TMP25]], align 1, !alias.scope [[META11]], !noalias [[META13]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: [[TMP28:%.*]] = add <4 x i8> [[WIDE_LOAD35]], [[BROADCAST_SPLAT30]] +; CHECK-NEXT: store <4 x i8> [[TMP28]], ptr [[TMP27]], align 1, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: [[INDEX_NEXT36]] = add nuw i64 [[INDEX31]], 4 +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT36]], [[N_VEC28]] +; CHECK-NEXT: br i1 [[TMP29]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N37:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC28]] +; CHECK-NEXT: br i1 [[CMP_N37]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP21]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP1]], %[[VECTOR_MEMCHECK]] ], [ [[TMP1]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP1]], %[[WHILE_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP60:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[IV]] +; CHECK-NEXT: [[TMP60:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IV]] ; CHECK-NEXT: [[TMP61:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 ; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP61]], [[TMP60]] -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IV]] ; CHECK-NEXT: [[TMP62:%.*]] = load i8, ptr [[ARRAYIDX5]], align 1 ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP62]] ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[IV]] ; CHECK-NEXT: [[TMP63:%.*]] = load i8, ptr [[ARRAYIDX10]], align 1 ; CHECK-NEXT: [[ADD12:%.*]] = add i8 [[TMP63]], [[CONV8]] ; CHECK-NEXT: store i8 [[ADD12]], ptr [[ARRAYIDX10]], align 1 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[TMP64:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP64]], 16 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT]], label %[[WHILE_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[WHILE_END_LOOPEXIT]]: ; CHECK-NEXT: br label %[[WHILE_END]] ; CHECK: [[WHILE_END]]: @@ -380,17 +467,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TC]], 1 ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 1028, [[TMP20]] -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TC]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -; CHECK-NEXT: [[TMP23:%.*]] = sub i64 1027, [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP21]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp ult i32 [[TMP25]], [[TMP21]] -; CHECK-NEXT: [[TMP27:%.*]] = icmp ugt i64 [[TMP23]], 4294967295 -; CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP26]], [[TMP27]] -; CHECK-NEXT: br i1 [[TMP28]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 @@ -410,21 +487,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 [[TMP1]]) ; CHECK-NEXT: [[TMP30:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP31:%.*]] = xor i1 [[TMP30]], true -; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[WHILE_END_LOOPEXIT:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[WHILE_BODY:.*]] -; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP18]], [[CONV]] -; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP29]], 1027 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT]], label %[[WHILE_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[WHILE_END_LOOPEXIT]]: ; CHECK-NEXT: br label %[[WHILE_END]] ; CHECK: [[WHILE_END]]: @@ -470,7 +535,7 @@ define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -509,7 +574,7 @@ define i32 @tc4_from_profile(ptr noundef readonly captures(none) %tmp, i64 %N) v ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !prof [[PROF23:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[ADD_LCSSA]] @@ -543,20 +608,48 @@ exit: ; preds = %for.body ; CHECK-VS1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK-VS1: [[PROF3]] = !{!"branch_weights", i32 8, i32 8} ; CHECK-VS1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} -; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK-VS1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} -; CHECK-VS1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK-VS1: [[PROF9]] = !{!"branch_weights", i32 10, i32 30} +; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-VS1: [[META6]] = !{[[META7:![0-9]+]]} +; CHECK-VS1: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} +; CHECK-VS1: [[META8]] = distinct !{[[META8]], !"LVerDomain"} +; CHECK-VS1: [[META9]] = !{[[META10:![0-9]+]]} +; CHECK-VS1: [[META10]] = distinct !{[[META10]], [[META8]]} +; CHECK-VS1: [[META11]] = !{[[META12:![0-9]+]]} +; CHECK-VS1: [[META12]] = distinct !{[[META12]], [[META8]]} +; CHECK-VS1: [[META13]] = !{[[META14:![0-9]+]], [[META7]], [[META10]]} +; CHECK-VS1: [[META14]] = distinct !{[[META14]], [[META8]]} +; CHECK-VS1: [[META15]] = !{[[META14]]} +; CHECK-VS1: [[META16]] = !{[[META7]], [[META10]]} +; CHECK-VS1: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} +; CHECK-VS1: [[PROF18]] = !{!"branch_weights", i32 4, i32 12} +; CHECK-VS1: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]} +; CHECK-VS1: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]]} +; CHECK-VS1: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} +; CHECK-VS1: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]} +; CHECK-VS1: [[PROF23]] = !{!"branch_weights", i32 10, i32 30} ;. ; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK-VS2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-VS2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK-VS2: [[PROF3]] = !{!"branch_weights", i32 8, i32 8} ; CHECK-VS2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} -; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK-VS2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} -; CHECK-VS2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK-VS2: [[PROF9]] = !{!"branch_weights", i32 10, i32 30} +; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-VS2: [[META6]] = !{[[META7:![0-9]+]]} +; CHECK-VS2: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} +; CHECK-VS2: [[META8]] = distinct !{[[META8]], !"LVerDomain"} +; CHECK-VS2: [[META9]] = !{[[META10:![0-9]+]]} +; CHECK-VS2: [[META10]] = distinct !{[[META10]], [[META8]]} +; CHECK-VS2: [[META11]] = !{[[META12:![0-9]+]]} +; CHECK-VS2: [[META12]] = distinct !{[[META12]], [[META8]]} +; CHECK-VS2: [[META13]] = !{[[META14:![0-9]+]], [[META7]], [[META10]]} +; CHECK-VS2: [[META14]] = distinct !{[[META14]], [[META8]]} +; CHECK-VS2: [[META15]] = !{[[META14]]} +; CHECK-VS2: [[META16]] = !{[[META7]], [[META10]]} +; CHECK-VS2: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} +; CHECK-VS2: [[PROF18]] = !{!"branch_weights", i32 4, i32 12} +; CHECK-VS2: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]} +; CHECK-VS2: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]]} +; CHECK-VS2: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} +; CHECK-VS2: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]} +; CHECK-VS2: [[PROF23]] = !{!"branch_weights", i32 10, i32 30} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll index 14398eebc6674..25d917ff849f1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll @@ -15,31 +15,7 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX16]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4) -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[SMAX]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 4, i8 [[TMP5]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = add i8 4, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i8 [[TMP6]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[TMP4]], 255 -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[OFF]], 3 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 512, i64 [[TMP12]]) -; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW3]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST_2]], i64 1 ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 1 @@ -64,14 +40,14 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP5]] ; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] ; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 [[TMP2]]) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE22:.*]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE17]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE17]] ] ; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP28]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer @@ -88,35 +64,35 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]] -; CHECK: [[PRED_STORE_IF17]]: +; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] +; CHECK: [[PRED_STORE_IF12]]: ; CHECK-NEXT: [[TMP108:%.*]] = extractelement <4 x i64> [[TMP26]], i32 1 ; CHECK-NEXT: [[TMP109:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP108]], i64 [[OFF]] ; CHECK-NEXT: [[TMP110:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1 ; CHECK-NEXT: [[TMP111:%.*]] = or i64 [[TMP110]], 1 ; CHECK-NEXT: store i64 [[TMP111]], ptr [[TMP109]], align 8, !alias.scope [[META3]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]] -; CHECK: [[PRED_STORE_CONTINUE18]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]] +; CHECK: [[PRED_STORE_CONTINUE13]]: ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 -; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] -; CHECK: [[PRED_STORE_IF19]]: +; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] +; CHECK: [[PRED_STORE_IF14]]: ; CHECK-NEXT: [[TMP114:%.*]] = extractelement <4 x i64> [[TMP26]], i32 2 ; CHECK-NEXT: [[TMP115:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP114]], i64 [[OFF]] ; CHECK-NEXT: [[TMP116:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2 ; CHECK-NEXT: [[TMP117:%.*]] = or i64 [[TMP116]], 1 ; CHECK-NEXT: store i64 [[TMP117]], ptr [[TMP115]], align 8, !alias.scope [[META3]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] -; CHECK: [[PRED_STORE_CONTINUE20]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]] +; CHECK: [[PRED_STORE_CONTINUE15]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_IF21]]: +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]] +; CHECK: [[PRED_STORE_IF16]]: ; CHECK-NEXT: [[TMP120:%.*]] = extractelement <4 x i64> [[TMP26]], i32 3 ; CHECK-NEXT: [[TMP121:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP120]], i64 [[OFF]] ; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3 ; CHECK-NEXT: [[TMP123:%.*]] = or i64 [[TMP122]], 1 ; CHECK-NEXT: store i64 [[TMP123]], ptr [[TMP121]], align 8, !alias.scope [[META3]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]] +; CHECK: [[PRED_STORE_CONTINUE17]]: ; CHECK-NEXT: store i8 0, ptr [[DST_2]], align 1, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX_NEXT]], i32 [[TMP2]]) @@ -127,10 +103,9 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[SRC]], align 1 ; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = or i64 [[L_EXT]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 66211bd0353d4..2ce0eed8dca5c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -386,17 +386,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[END]] to i10 ; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64 ; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1) -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1) -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 -; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255 -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[UMAX1]], 4 +; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] @@ -423,8 +414,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[FOUND:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -538,5 +529,5 @@ attributes #1 = { "target-features"="+sve" vscale_range(1,16) } ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll index 6eb8242bf7975..eb2147a2b42a3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll @@ -148,23 +148,7 @@ define void @test_interleave_store_one_constant(ptr noalias %src, ptr noalias %d ; CHECK-NEXT: [[ITER_CHECK:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]] -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8 -; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[TMP0]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] @@ -235,7 +219,7 @@ define void @test_interleave_store_one_constant(ptr noalias %src, ptr noalias %d ; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC13]] ; CHECK-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -278,38 +262,7 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[ITER_CHECK:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 8 -; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[B]] -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW3]] -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[C]], i64 8 -; CHECK-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; CHECK-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]] -; CHECK-NEXT: [[MUL8:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) -; CHECK-NEXT: [[MUL_RESULT9:%.*]] = extractvalue { i64, i1 } [[MUL8]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW10:%.*]] = extractvalue { i64, i1 } [[MUL8]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[C]], i64 [[MUL_RESULT9]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[C]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW10]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]] -; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP19]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK11:%.*]] = icmp ult i64 [[TMP0]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK11]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] @@ -390,7 +343,7 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[CMP_N26:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC23]] ; CHECK-NEXT: br i1 [[CMP_N26]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index f1fbf1dd5d942..46461ecfeb5bc 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -237,16 +237,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[ITER_CHECK:.*:]] ; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2 -; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; VF-TWO-CHECK: [[VECTOR_SCEVCHECK]]: -; VF-TWO-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 -; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; VF-TWO-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]] -; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]] -; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]] -; VF-TWO-CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; VF-TWO-CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] @@ -360,16 +351,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[ITER_CHECK:.*:]] ; VF-FOUR-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 -; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; VF-FOUR-CHECK: [[VECTOR_SCEVCHECK]]: -; VF-FOUR-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 -; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; VF-FOUR-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]] -; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]] -; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]] -; VF-FOUR-CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; VF-FOUR-CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll index 34b8deaa8de03..22516009d86e2 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll @@ -16,16 +16,7 @@ define void @test_iv_trunc_crash(ptr %a, ptr %b, i32 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX1]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP3]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[N]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 @@ -49,12 +40,12 @@ define void @test_iv_trunc_crash(ptr %a, ptr %b, i32 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi double [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ [[SUM_0]], %[[ENTRY]] ], [ [[SUM_0]], %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi double [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ [[SUM_0]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], %[[LOOP_BODY:.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_BODY]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[I]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP_BODY]] ; CHECK: [[LOOP_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index 36ebd422b5d7b..f5140766bc68d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -11,22 +11,13 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) { ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX2]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1 ; CHECK-NEXT: br label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[IBOX]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[NBRBOXES:%.*]], i64 4 ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[BOXES:%.*]], i64 4 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[NBRBOXES]], [[SCEVGEP1]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[BOXES]], [[SCEVGEP]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i1 [[COND:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer @@ -48,10 +39,9 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP_THEN:%.*]], label [[LOOP_ELSE:%.*]] ; CHECK: loop.then: ; CHECK-NEXT: br label [[LOOP_LATCH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index f49f9284a1e93..210e2074138a0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -17,25 +17,10 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-LABEL: @foo4( ; RV32-NEXT: entry: ; RV32-NEXT: br label [[VECTOR_MEMCHECK:%.*]] -; RV32: vector.scevcheck: -; RV32-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 128, i32 624) -; RV32-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; RV32-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; RV32-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[MUL_RESULT]] -; RV32-NEXT: [[TMP1:%.*]] = icmp ult ptr [[TMP0]], [[A]] -; RV32-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[MUL_OVERFLOW]] -; RV32-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 256, i32 624) -; RV32-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; RV32-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; RV32-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[MUL_RESULT2]] -; RV32-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[B]] -; RV32-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW3]] -; RV32-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[TMP5]] -; RV32-NEXT: br i1 [[TMP6]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK1:%.*]] ; RV32: vector.memcheck: ; RV32-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i32 39940 -; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i32 79880 -; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B]], i32 159752 +; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 79880 +; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 159752 ; RV32-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] ; RV32-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] ; RV32-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] @@ -43,7 +28,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] ; RV32-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] ; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] -; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; RV32: vector.ph: ; RV32-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() ; RV32-NEXT: [[TMP9:%.*]] = mul nuw nsw [[TMP7]], splat (i64 16) @@ -74,10 +59,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32: middle.block: ; RV32-NEXT: br label [[FOR_END:%.*]] ; RV32: scalar.ph: -; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ] ; RV32-NEXT: br label [[FOR_BODY:%.*]] ; RV32: for.body: -; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; RV32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] ; RV32-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP21]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index f7340fee47eb8..6a14592fb37bd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -186,23 +186,14 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] ; RV64: [[FOR_BODY_PREHEADER]]: ; RV64-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 -; RV64-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; RV64: [[VECTOR_SCEVCHECK]]: -; RV64-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP0]], -1 -; RV64-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1 -; RV64-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32 -; RV64-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]] -; RV64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]] -; RV64-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295 -; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP9]] -; RV64-NEXT: br i1 [[TMP8]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; RV64-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; RV64: [[VECTOR_MEMCHECK]]: ; RV64-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] -; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64: [[VECTOR_BODY]]: @@ -240,8 +231,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64: [[MIDDLE_BLOCK]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]] ; RV64: [[SCALAR_PH]]: -; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] -; RV64-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] ; RV64-NEXT: br label %[[FOR_BODY:.*]] ; RV64: [[FOR_COND_CLEANUP_LOOPEXIT]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP]] @@ -320,16 +309,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] -; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; RV64-UF2: [[VECTOR_SCEVCHECK]]: -; RV64-UF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP0]], -1 -; RV64-UF2-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1 -; RV64-UF2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32 -; RV64-UF2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]] -; RV64-UF2-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]] -; RV64-UF2-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295 -; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]] -; RV64-UF2-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; RV64-UF2: [[VECTOR_MEMCHECK]]: ; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 @@ -392,8 +372,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[TMP20]] ; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] -; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] +; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] ; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] ; RV64-UF2: [[FOR_COND_CLEANUP_LOOPEXIT]]: ; RV64-UF2-NEXT: br label %[[FOR_COND_CLEANUP]] @@ -437,23 +417,14 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] ; RV64: [[FOR_BODY_PREHEADER]]: ; RV64-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 -; RV64-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; RV64: [[VECTOR_SCEVCHECK]]: -; RV64-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP0]], -1 -; RV64-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1 -; RV64-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32 -; RV64-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]] -; RV64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]] -; RV64-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295 -; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP9]] -; RV64-NEXT: br i1 [[TMP8]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; RV64-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; RV64: [[VECTOR_MEMCHECK]]: ; RV64-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] -; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64: [[VECTOR_BODY]]: @@ -491,8 +462,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64: [[MIDDLE_BLOCK]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]] ; RV64: [[SCALAR_PH]]: -; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] -; RV64-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] ; RV64-NEXT: br label %[[FOR_BODY:.*]] ; RV64: [[FOR_COND_CLEANUP_LOOPEXIT]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP]] @@ -571,16 +540,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 ; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] -; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; RV64-UF2: [[VECTOR_SCEVCHECK]]: -; RV64-UF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP0]], -1 -; RV64-UF2-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1 -; RV64-UF2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32 -; RV64-UF2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]] -; RV64-UF2-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]] -; RV64-UF2-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295 -; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]] -; RV64-UF2-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; RV64-UF2: [[VECTOR_MEMCHECK]]: ; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 @@ -643,8 +603,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[TMP20]] ; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] -; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] +; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] ; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] ; RV64-UF2: [[FOR_COND_CLEANUP_LOOPEXIT]]: ; RV64-UF2-NEXT: br label %[[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 96e1ebb48b02c..a978075431e81 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -727,44 +727,15 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-LABEL: @double_stride_int_scaled( ; STRIDED-NEXT: entry: ; STRIDED-NEXT: br label [[VECTOR_SCEVCHECK:%.*]] -; STRIDED: vector.scevcheck: -; STRIDED-NEXT: [[TMP24:%.*]] = shl i64 [[STRIDE:%.*]], 2 -; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], -4 -; STRIDED-NEXT: [[TMP26:%.*]] = icmp slt i64 [[TMP24]], 0 -; STRIDED-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 [[TMP24]] -; STRIDED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP27]], i64 1023) -; STRIDED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; STRIDED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; STRIDED-NEXT: [[TMP28:%.*]] = sub i64 0, [[MUL_RESULT]] -; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[MUL_RESULT]] -; STRIDED-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP28]] -; STRIDED-NEXT: [[TMP31:%.*]] = icmp ult ptr [[TMP29]], [[P2]] -; STRIDED-NEXT: [[TMP32:%.*]] = icmp ugt ptr [[TMP30]], [[P2]] -; STRIDED-NEXT: [[TMP33:%.*]] = select i1 [[TMP26]], i1 [[TMP32]], i1 [[TMP31]] -; STRIDED-NEXT: [[TMP13:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW]] -; STRIDED-NEXT: [[TMP34:%.*]] = icmp slt i64 [[TMP24]], 0 -; STRIDED-NEXT: [[TMP15:%.*]] = select i1 [[TMP34]], i64 [[TMP25]], i64 [[TMP24]] -; STRIDED-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP15]], i64 1023) -; STRIDED-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; STRIDED-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; STRIDED-NEXT: [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT2]] -; STRIDED-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT2]] -; STRIDED-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP16]] -; STRIDED-NEXT: [[TMP37:%.*]] = icmp ult ptr [[TMP35]], [[P]] -; STRIDED-NEXT: [[TMP38:%.*]] = icmp ugt ptr [[TMP36]], [[P]] -; STRIDED-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i1 [[TMP38]], i1 [[TMP37]] -; STRIDED-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[MUL_OVERFLOW3]] -; STRIDED-NEXT: [[TMP23:%.*]] = or i1 [[TMP13]], [[TMP40]] -; STRIDED-NEXT: br i1 [[TMP23]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK1:%.*]] ; STRIDED: vector.memcheck: -; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE]], 4092 -; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP3]] +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 4092 +; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]] ; STRIDED-NEXT: [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]] ; STRIDED-NEXT: [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]] ; STRIDED-NEXT: [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]] ; STRIDED-NEXT: [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]] ; STRIDED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4 -; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP3]] +; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]] ; STRIDED-NEXT: [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]] ; STRIDED-NEXT: [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]] ; STRIDED-NEXT: [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]] @@ -773,7 +744,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[UMIN]], [[SCEVGEP5]] ; STRIDED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[UMIN3]], [[SCEVGEP1]] ; STRIDED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; STRIDED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; STRIDED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer @@ -801,10 +772,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED: middle.block: ; STRIDED-NEXT: br label [[EXIT:%.*]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ] ; STRIDED-NEXT: br label [[LOOP:%.*]] ; STRIDED: loop: -; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; STRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; STRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]] ; STRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]] ; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 @@ -821,47 +791,18 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-UF2-NEXT: entry: ; STRIDED-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 -; STRIDED-UF2-NEXT: [[UMAX9:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 79) +; STRIDED-UF2-NEXT: [[UMAX9:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 24) ; STRIDED-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[UMAX9]] ; STRIDED-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; STRIDED-UF2: vector.scevcheck: -; STRIDED-UF2-NEXT: [[TMP2:%.*]] = shl i64 [[STRIDE:%.*]], 2 -; STRIDED-UF2-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE]], -4 -; STRIDED-UF2-NEXT: [[TMP4:%.*]] = icmp slt i64 [[TMP2]], 0 -; STRIDED-UF2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[TMP2]] -; STRIDED-UF2-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP5]], i64 1023) -; STRIDED-UF2-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; STRIDED-UF2-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; STRIDED-UF2-NEXT: [[TMP6:%.*]] = sub i64 0, [[MUL_RESULT]] -; STRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[MUL_RESULT]] -; STRIDED-UF2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP6]] -; STRIDED-UF2-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP7]], [[P2]] -; STRIDED-UF2-NEXT: [[TMP10:%.*]] = icmp ugt ptr [[TMP8]], [[P2]] -; STRIDED-UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP4]], i1 [[TMP10]], i1 [[TMP9]] -; STRIDED-UF2-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] -; STRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP2]], 0 -; STRIDED-UF2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP3]], i64 [[TMP2]] -; STRIDED-UF2-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP14]], i64 1023) -; STRIDED-UF2-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; STRIDED-UF2-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; STRIDED-UF2-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT2]] -; STRIDED-UF2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT2]] -; STRIDED-UF2-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP15]] -; STRIDED-UF2-NEXT: [[TMP18:%.*]] = icmp ult ptr [[TMP16]], [[P]] -; STRIDED-UF2-NEXT: [[TMP19:%.*]] = icmp ugt ptr [[TMP17]], [[P]] -; STRIDED-UF2-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], i1 [[TMP19]], i1 [[TMP18]] -; STRIDED-UF2-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW3]] -; STRIDED-UF2-NEXT: [[TMP22:%.*]] = or i1 [[TMP12]], [[TMP21]] -; STRIDED-UF2-NEXT: br i1 [[TMP22]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; STRIDED-UF2: vector.memcheck: -; STRIDED-UF2-NEXT: [[TMP23:%.*]] = mul i64 [[STRIDE]], 4092 -; STRIDED-UF2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP23]] +; STRIDED-UF2-NEXT: [[TMP23:%.*]] = mul i64 [[STRIDE:%.*]], 4092 +; STRIDED-UF2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP23]] ; STRIDED-UF2-NEXT: [[TMP24:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]] ; STRIDED-UF2-NEXT: [[UMIN:%.*]] = select i1 [[TMP24]], ptr [[P2]], ptr [[SCEVGEP]] ; STRIDED-UF2-NEXT: [[TMP25:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]] ; STRIDED-UF2-NEXT: [[UMAX:%.*]] = select i1 [[TMP25]], ptr [[P2]], ptr [[SCEVGEP]] ; STRIDED-UF2-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4 -; STRIDED-UF2-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP23]] +; STRIDED-UF2-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP23]] ; STRIDED-UF2-NEXT: [[TMP26:%.*]] = icmp ult ptr [[P]], [[SCEVGEP5]] ; STRIDED-UF2-NEXT: [[UMIN6:%.*]] = select i1 [[TMP26]], ptr [[P]], ptr [[SCEVGEP5]] ; STRIDED-UF2-NEXT: [[TMP27:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP5]] @@ -909,7 +850,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; STRIDED-UF2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; STRIDED-UF2: scalar.ph: -; STRIDED-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; STRIDED-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; STRIDED-UF2-NEXT: br label [[LOOP:%.*]] ; STRIDED-UF2: loop: ; STRIDED-UF2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 801f910c5e13d..9e0f6bb20feb6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -334,24 +334,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] ; CHECK-NEXT: [[UMIN10:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN10]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 24 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1) -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMAX]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]] -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[A]]) -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 1, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMIN]], 4294967295 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[UMIN]] to i32 -; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[UMIN]], 4294967295 -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP9]], [[TMP13]] -; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ule i64 [[TMP2]], 12 +; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 1 ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 8 @@ -398,8 +382,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ] @@ -822,10 +806,7 @@ define i32 @g(i64 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[N:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N]], 4294967295 -; CHECK-NEXT: br i1 [[TMP2]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP1]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -904,8 +885,8 @@ define i32 @g(i64 %n) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i32 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i32 [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i32 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i32 [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL17]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll index 62fda4e0c2098..613bd0ab290a1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll @@ -6,14 +6,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64 -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 @@ -43,23 +36,6 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[DIV18:%.*]] = sdiv i64 [[M]], [[CONV6]] -; CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[DIV18]] to i32 -; CHECK-NEXT: [[MUL30:%.*]] = mul i64 [[DIV18]], [[CONV61]] -; CHECK-NEXT: [[SUB31:%.*]] = sub i64 [[IV]], [[MUL30]] -; CHECK-NEXT: [[CONV34:%.*]] = trunc i64 [[SUB31]] to i32 -; CHECK-NEXT: [[MUL35:%.*]] = mul i32 [[X]], [[CONV20]] -; CHECK-NEXT: [[ADD36:%.*]] = add i32 [[MUL35]], [[CONV34]] -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD36]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[DST]], i64 [[IDXPROM]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -92,14 +68,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64 -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 @@ -153,31 +122,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[TMP5]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ule i64 [[IV]], [[M]] -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[DIV18:%.*]] = sdiv i64 [[M]], [[CONV6]] -; CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[DIV18]] to i32 -; CHECK-NEXT: [[MUL30:%.*]] = mul i64 [[DIV18]], [[CONV61]] -; CHECK-NEXT: [[SUB31:%.*]] = sub i64 [[IV]], [[MUL30]] -; CHECK-NEXT: [[CONV34:%.*]] = trunc i64 [[SUB31]] to i32 -; CHECK-NEXT: [[MUL35:%.*]] = mul i32 [[X]], [[CONV20]] -; CHECK-NEXT: [[ADD36:%.*]] = add i32 [[MUL35]], [[CONV34]] -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD36]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[DST]], i64 [[IDXPROM]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -277,7 +224,5 @@ exit: ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index a1b92e0658bd3..d5a6f3389176e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -12,15 +12,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_START]] to i32 -; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP4]], i32 92) -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SMAX]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK2:%.*]] = icmp ult i64 [[TMP3]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK2]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -87,7 +79,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index 08d39ea038586..62f30de45fa60 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -25,8 +25,6 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[M:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_BODY3_LR_PH_US_PREHEADER:%.*]], label [[FOR_END15:%.*]] ; CHECK: for.body3.lr.ph.us.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[K:%.*]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[M]] to i64 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: @@ -53,17 +51,11 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.body3.lr.ph.us: ; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP1]], [[INDVARS_IV33]] -; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 -; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP8]], [[K]] +; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP8]], [[K:%.*]] ; CHECK-NEXT: [[ARRAYIDX7_US]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV33]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP7]], [[TMP0]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[TMP9]], [[TMP7]] -; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] @@ -80,12 +72,12 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ] ; CHECK-NEXT: br label [[FOR_BODY3_US]] ; CHECK: for.end15.loopexit: ; CHECK-NEXT: br label [[FOR_END15]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 5eeebf2009a62..20c4b76f0feb6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -82,17 +82,8 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 18 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG]], i64 16 -; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[ARG1]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16 @@ -143,7 +134,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -242,88 +233,8 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 56 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3 -; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP41:%.*]] = icmp ult ptr [[TMP32]], [[A]] -; CHECK-NEXT: [[TMP44:%.*]] = or i1 [[TMP41]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4 -; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP57:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]] -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8 -; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 12 -; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP5]] -; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW8]] -; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 16 -; CHECK-NEXT: [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]] -; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 20 -; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]] -; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]] -; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 24 -; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]] -; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]] -; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]] -; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 28 -; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]] -; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]] -; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]] -; CHECK-NEXT: [[SCEVGEP31:%.*]] = getelementptr i8, ptr [[B]], i64 4 -; CHECK-NEXT: [[MUL29:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT30:%.*]] = extractvalue { i64, i1 } [[MUL29]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW31:%.*]] = extractvalue { i64, i1 } [[MUL29]], 1 -; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SCEVGEP31]], i64 [[MUL_RESULT30]] -; CHECK-NEXT: [[TMP69:%.*]] = icmp ult ptr [[TMP68]], [[SCEVGEP31]] -; CHECK-NEXT: [[TMP70:%.*]] = or i1 [[TMP69]], [[MUL_OVERFLOW31]] -; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT26]] -; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP71]], [[B]] -; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP44]], [[TMP57]] -; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]] -; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]] -; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]] -; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]] -; CHECK-NEXT: [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]] -; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]] -; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP40]], [[TMP70]] -; CHECK-NEXT: [[TMP73:%.*]] = or i1 [[TMP72]], [[TMP34]] -; CHECK-NEXT: br i1 [[TMP73]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP42:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP43:%.*]] = shl i64 [[TMP42]], 5 @@ -376,7 +287,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index 2809a77b36f1a..b45cda704cf87 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -10,15 +10,7 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[UMIN2:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[X]]) ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[UMIN2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[N]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]] -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[X]]) -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN]] to i1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[UMIN]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 @@ -42,8 +34,8 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[TMP31:%.*]] = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> [[TMP29]]) ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP31]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP31]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -97,5 +89,5 @@ exit.2: ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index 9217c905945ac..70f4c2cd8ed65 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -38,23 +38,8 @@ define i32 @main(ptr %ptr) { ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 ; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], [[UMIN1]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 24 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[CONV3]], -1 -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP6]]) -; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], [[UMIN]] -; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 -; CHECK-NEXT: [[TMP9:%.*]] = sub i8 [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i8 [[TMP9]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i32 [[TMP7]], 255 -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[DOTPROMOTED]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP7]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP14]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP13]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ult i32 [[TMP4]], 8 +; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP4]], 8 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP4]], [[N_MOD_VF]] @@ -77,8 +62,8 @@ define i32 @main(ptr %ptr) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP4]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index 368842634c374..23eba1cd75dd7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -35,19 +35,7 @@ define void @test(ptr %p) { ; VEC-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 ; VEC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 ; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4 -; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; VEC: vector.scevcheck: -; VEC-NEXT: [[TMP5:%.*]] = add i64 [[P1]], 16 -; VEC-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP5]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1)) -; VEC-NEXT: [[TMP6:%.*]] = add i64 [[UMAX]], -9 -; VEC-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[P1]] -; VEC-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 3 -; VEC-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP8]] to i16 -; VEC-NEXT: [[TMP11:%.*]] = add i16 2, [[TMP10]] -; VEC-NEXT: [[TMP12:%.*]] = icmp ult i16 [[TMP11]], 2 -; VEC-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP8]], 65535 -; VEC-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] -; VEC-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC: vector.ph: ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4 ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] @@ -84,9 +72,9 @@ define void @test(ptr %p) { ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; VEC: scalar.ph: -; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ] -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ] -; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; VEC-NEXT: br label [[FOR_BODY:%.*]] ; VEC: for.body: ; VEC-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IDX:%.*]], [[FOR_BODY]] ] @@ -126,5 +114,5 @@ exit: ; VEC: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VEC: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; VEC: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; VEC: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; VEC: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index 3813560d9300a..89f0b04a53f85 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -9,10 +9,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64-NEXT: [[ITER_CHECK:.*:]] ; I64-NEXT: [[TMP4:%.*]] = add i32 [[N]], 1 ; I64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP4]], 4 -; I64-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; I64: [[VECTOR_SCEVCHECK]]: -; I64-NEXT: [[TMP1:%.*]] = icmp slt i32 [[N]], 0 -; I64-NEXT: br i1 [[TMP1]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; I64-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; I64: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; I64-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ule i32 [[TMP4]], 16 ; I64-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll index 7e6b5e932b6c6..93a17f20b468c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" ; expression when vectorizing loop.2 define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr %dst) { ; CHECK-LABEL: define void @value_defined_in_loop1_used_for_trip_counts( -; CHECK-SAME: i32 [[START:%.*]], i1 [[C:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: i32 [[OFFSET_IDX:%.*]], i1 [[C:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], i32 0, i32 7 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64 @@ -33,10 +33,31 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT_1_LOOPEXIT1:.*]] ; CHECK: [[LOOP_2_PREHEADER]]: +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IV_1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH2:.*]] +; CHECK: [[VECTOR_PH2]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], [[DOTCAST]] +; CHECK-NEXT: br label %[[VECTOR_BODY3:.*]] +; CHECK: [[VECTOR_BODY3]]: +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[TMP5]], align 2 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK4:.*]] +; CHECK: [[MIDDLE_BLOCK4]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_1_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK4]] ], [ 0, %[[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK4]] ], [ [[OFFSET_IDX]], %[[LOOP_2_PREHEADER]] ] ; CHECK-NEXT: br label %[[LOOP_2:.*]] ; CHECK: [[LOOP_2]]: -; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ 0, %[[LOOP_2_PREHEADER]] ] -; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP_2]] ], [ [[START]], %[[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP_2]] ], [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_3_NEXT]] = add i32 [[IV_3]], 1 ; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], 1 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[IV_3]], 1 @@ -44,7 +65,7 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[ZEXT8]] ; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2 ; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_2]], [[IV_1_LCSSA]] -; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT_1_LOOPEXIT:.*]] +; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT_1_LOOPEXIT]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[EXIT_1_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT_1:.*]] ; CHECK: [[EXIT_1_LOOPEXIT1]]: @@ -87,3 +108,8 @@ loop.3: exit.1: ret void } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll index d29719de79ffd..c9e94697f5006 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -44,8 +44,9 @@ define void @example() { ; FORCED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FORCED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop @@ -85,12 +86,7 @@ define void @test_replicating_store_x86_fp80_cost(i32 %n, ptr %dst) #0 { ; FORCED-NEXT: [[ENTRY:.*:]] ; FORCED-NEXT: [[TMP0:%.*]] = add i32 [[N]], 2 ; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 -; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; FORCED: [[VECTOR_SCEVCHECK]]: -; FORCED-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64 -; FORCED-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; FORCED-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], 4294967295 -; FORCED-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; FORCED: [[VECTOR_PH]]: ; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 ; FORCED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index 4445141549069..42c4d3a1ca4fe 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -12,11 +12,7 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]] ; MAINVF4IC1_EPI4: [[ITER_CHECK]]: ; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4 -; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]] -; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]: -; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0 -; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255 -; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]] +; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2:![0-9]+]] ; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4 ; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]] @@ -55,11 +51,7 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]] ; MAINVF4IC2_EPI4: [[ITER_CHECK]]: ; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4 -; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]] -; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]: -; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0 -; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255 -; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]] +; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2:![0-9]+]] ; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8 ; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]] @@ -130,7 +122,7 @@ exit: ; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} ; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} ; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]], [[META15:![0-9]+]]} +; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META6]], [[META5]], [[META15:![0-9]+]]} ; MAINVF4IC1_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. ; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13} @@ -148,6 +140,6 @@ exit: ; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} ; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3} ; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]} +; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META6]], [[META5]], [[META16:![0-9]+]]} ; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll index f79deac2a45b0..7660613d2a4b7 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll @@ -9,13 +9,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) { ; CHECK-NEXT: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[N]], 4294967295 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br i1 [[TMP4]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -58,7 +52,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) { ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -96,5 +90,5 @@ exit: ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[PROF3]] = !{!"branch_weights", i32 4, i32 0} ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index b0350cd884af1..97ca273c5bc56 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -783,12 +783,7 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] @@ -812,8 +807,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -862,12 +857,7 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] @@ -893,8 +883,8 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index b6fb378a042fd..f56c9ae82e793 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -3278,20 +3278,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] @@ -3317,9 +3304,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3345,23 +3332,12 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; IND: loop.preheader: ; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 -; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; IND: vector.scevcheck: -; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; IND-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 -; IND-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] -; IND-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8 -; IND-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]] -; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]] -; IND-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; IND-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]] -; IND-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 510 +; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2 ; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; IND-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]] -; IND-NEXT: [[IND_END2:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]] +; IND-NEXT: [[TMP2:%.*]] = add i32 [[N_VEC]], [[EXT]] ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], @@ -3382,9 +3358,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3411,23 +3387,12 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL: loop.preheader: ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; UNROLL: vector.scevcheck: -; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; UNROLL-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 -; UNROLL-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] -; UNROLL-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8 -; UNROLL-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]] -; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]] -; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; UNROLL-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]] -; UNROLL-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 508 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4 ; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]] -; UNROLL-NEXT: [[IND_END2:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]] +; UNROLL-NEXT: [[TMP2:%.*]] = add i32 [[N_VEC]], [[EXT]] ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], @@ -3451,9 +3416,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3481,20 +3446,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC: loop.preheader: ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; UNROLL-NO-IC: vector.scevcheck: -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] @@ -3523,9 +3475,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3551,23 +3503,12 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE: loop.preheader: ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 -; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; INTERLEAVE: vector.scevcheck: -; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] -; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]] -; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]] -; INTERLEAVE-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: -; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 504 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8 ; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]] -; INTERLEAVE-NEXT: [[IND_END2:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]] +; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i32 [[N_VEC]], [[EXT]] ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EXT]], i64 0 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], @@ -3591,9 +3532,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3653,20 +3594,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] @@ -3693,9 +3621,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3723,24 +3651,13 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; IND: loop.preheader: ; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 -; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; IND: vector.scevcheck: -; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; IND-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 -; IND-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] -; IND-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8 -; IND-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]] -; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]] -; IND-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; IND-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]] -; IND-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 510 +; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2 ; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; IND-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]] -; IND-NEXT: [[EXT_MUL5:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]] -; IND-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL5]], 2 +; IND-NEXT: [[EXT_MUL4:%.*]] = add i32 [[N_VEC]], [[EXT]] +; IND-NEXT: [[TMP2:%.*]] = shl i32 [[EXT_MUL4]], 2 ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], @@ -3761,9 +3678,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; IND-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3792,24 +3709,13 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL: loop.preheader: ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; UNROLL: vector.scevcheck: -; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; UNROLL-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 -; UNROLL-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] -; UNROLL-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8 -; UNROLL-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]] -; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]] -; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; UNROLL-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]] -; UNROLL-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 508 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4 ; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]] -; UNROLL-NEXT: [[EXT_MUL5:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]] -; UNROLL-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL5]], 2 +; UNROLL-NEXT: [[EXT_MUL4:%.*]] = add i32 [[N_VEC]], [[EXT]] +; UNROLL-NEXT: [[TMP2:%.*]] = shl i32 [[EXT_MUL4]], 2 ; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], @@ -3833,9 +3739,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3865,20 +3771,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC: loop.preheader: ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; UNROLL-NO-IC: vector.scevcheck: -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] @@ -3908,9 +3801,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3938,24 +3831,13 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE: loop.preheader: ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 -; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; INTERLEAVE: vector.scevcheck: -; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] -; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]] -; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 -; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]] -; INTERLEAVE-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: -; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 504 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8 ; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]] -; INTERLEAVE-NEXT: [[EXT_MUL5:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]] -; INTERLEAVE-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL5]], 2 +; INTERLEAVE-NEXT: [[EXT_MUL4:%.*]] = add i32 [[N_VEC]], [[EXT]] +; INTERLEAVE-NEXT: [[TMP2:%.*]] = shl i32 [[EXT_MUL4]], 2 ; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EXT_MUL]], i64 0 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], @@ -3979,9 +3861,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4230,14 +4112,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-LABEL: @trunciv( ; CHECK-NEXT: for.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] @@ -4256,7 +4131,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4272,12 +4147,9 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-LABEL: @trunciv( ; IND-NEXT: for.body.preheader: ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2 -; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; IND: vector.scevcheck: -; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 -; IND-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] +; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967294 +; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -2 ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4294,7 +4166,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; IND-NEXT: br label [[FOR_BODY:%.*]] ; IND: for.body: ; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4312,12 +4184,9 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-LABEL: @trunciv( ; UNROLL-NEXT: for.body.preheader: ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; UNROLL: vector.scevcheck: -; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 -; UNROLL-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967292 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -4 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4337,7 +4206,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4355,14 +4224,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-LABEL: @trunciv( ; UNROLL-NO-IC-NEXT: for.body.preheader: ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; UNROLL-NO-IC: vector.scevcheck: -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] @@ -4384,7 +4246,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4400,12 +4262,9 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-LABEL: @trunciv( ; INTERLEAVE-NEXT: for.body.preheader: ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8 -; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; INTERLEAVE: vector.scevcheck: -; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 -; INTERLEAVE-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] +; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: -; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967288 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -8 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4425,7 +4284,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] ; INTERLEAVE: for.body: ; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -5869,35 +5728,12 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = sext i8 [[TMP1]] to i32 -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[IDENT_CHECK]] -; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP:%.*]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> , [[DOTSPLAT]] @@ -5922,9 +5758,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -5944,32 +5780,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( ; IND-NEXT: entry: ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 -; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; IND: vector.scevcheck: -; IND-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 -; IND-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0 -; IND-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false) -; IND-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8 -; IND-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]]) -; IND-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; IND-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; IND-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; IND-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 -; IND-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]] -; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; IND-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 -; IND-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 -; IND-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; IND-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; IND-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128 -; IND-NEXT: [[IDENT_CHECK:%.*]] = icmp ult i32 [[TMP13]], -256 -; IND-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[IDENT_CHECK]] -; IND-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2 ; IND-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 -; IND-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] +; IND-NEXT: [[IND_END:%.*]] = mul i32 [[STEP:%.*]], [[DOTCAST]] ; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; IND-NEXT: [[TMP15:%.*]] = mul nuw nsw <2 x i32> [[DOTSPLAT]], @@ -5993,9 +5808,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6015,31 +5830,10 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( ; UNROLL-NEXT: entry: ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; UNROLL: vector.scevcheck: -; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 -; UNROLL-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0 -; UNROLL-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false) -; UNROLL-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8 -; UNROLL-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]]) -; UNROLL-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; UNROLL-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; UNROLL-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; UNROLL-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 -; UNROLL-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]] -; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 -; UNROLL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 -; UNROLL-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; UNROLL-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; UNROLL-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128 -; UNROLL-NEXT: [[IDENT_CHECK:%.*]] = icmp ult i32 [[TMP13]], -256 -; UNROLL-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[IDENT_CHECK]] -; UNROLL-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 -; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 +; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP:%.*]], i64 0 ; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] @@ -6066,9 +5860,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6088,34 +5882,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( ; UNROLL-NO-IC-NEXT: entry: ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; UNROLL-NO-IC: vector.scevcheck: -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 -; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) -; UNROLL-NO-IC-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; UNROLL-NO-IC-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = sext i8 [[TMP1]] to i32 -; UNROLL-NO-IC-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP15]] -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[IDENT_CHECK]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 +; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP:%.*]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] @@ -6143,9 +5914,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6165,31 +5936,10 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( ; INTERLEAVE-NEXT: entry: ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; INTERLEAVE: vector.scevcheck: -; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false) -; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8 -; INTERLEAVE-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]]) -; INTERLEAVE-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; INTERLEAVE-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 -; INTERLEAVE-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]] -; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 -; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 -; INTERLEAVE-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; INTERLEAVE-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; INTERLEAVE-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128 -; INTERLEAVE-NEXT: [[IDENT_CHECK:%.*]] = icmp ult i32 [[TMP13]], -256 -; INTERLEAVE-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[IDENT_CHECK]] -; INTERLEAVE-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 -; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 +; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP:%.*]], i64 0 ; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]] @@ -6216,9 +5966,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll index fdbc35b5e7a43..6a0f6f8cf094c 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll @@ -30,19 +30,7 @@ define void @wrap_around_scev_check(ptr noalias %a, ptr noalias %b, i8 %x, i8 %y ; CHECK: [[LOOP_PREHEADER]]: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i8 [[Y]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[WIDE_TRIP_COUNT]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8 -; CHECK-NEXT: [[MUL1:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 2, i8 [[TMP1]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[X]] -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp ugt i64 [[TMP0]], 255 -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP5]], [[TMP17]] -; CHECK-NEXT: br i1 [[TMP18]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 @@ -70,12 +58,12 @@ define void @wrap_around_scev_check(ptr noalias %a, ptr noalias %b, i8 %x, i8 %y ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[LOOP_PREHEADER]] ], [ [[X]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[INDEX_011:%.*]] = phi i8 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[INDEX_011:%.*]] = phi i8 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[INDEX_011]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -122,12 +110,8 @@ exit: define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(ptr noalias %x, ptr noalias %out) { ; CHECK-LABEL: define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count( ; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[OUT:%.*]]) { -; CHECK-NEXT: [[START:.*]]: -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[MUL:%.*]] = call { i4, i1 } @llvm.umul.with.overflow.i4(i4 5, i4 -1) -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i4, i1 } [[MUL]], 1 -; CHECK-NEXT: br i1 [[MUL_OVERFLOW]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -143,12 +127,11 @@ define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(ptr ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 12, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[IV_MUL5:%.*]] = mul nuw nsw i64 [[IV]], 5 ; CHECK-NEXT: [[IV_MUL5_MASKED:%.*]] = and i64 [[IV_MUL5]], 15 @@ -187,15 +170,7 @@ define void @wrap_predicate_for_interleave_group_unknown_trip_count(ptr noalias ; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[START:.*]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP0]] to i4 -; CHECK-NEXT: [[MUL:%.*]] = call { i4, i1 } @llvm.umul.with.overflow.i4(i4 3, i4 [[TMP9]]) -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i4, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 15 -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]] -; CHECK-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 @@ -217,7 +192,7 @@ define void @wrap_predicate_for_interleave_group_unknown_trip_count(ptr noalias ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[START]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[START]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 8d3d0ff7a6406..d66e7edc69b9b 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -166,16 +166,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64 ; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1) -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 -; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255 -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] @@ -201,7 +192,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.if3: +; CHECK: pred.store.if2: ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 @@ -209,7 +200,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]] ; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: +; CHECK: pred.store.continue3: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -217,8 +208,8 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll index 404ef096b49b3..511a1aca681dc 100644 --- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll +++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll @@ -16,15 +16,7 @@ define i32 @test(ptr %arr, i64 %n) { ; CHECK: preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], -2 -; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP1]] to i8 -; CHECK-NEXT: [[TMP8:%.*]] = add i8 2, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i8 [[TMP8]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP1]], 255 -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] @@ -46,8 +38,8 @@ define i32 @test(ptr %arr, i64 %n) { ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOAD_VAL:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[CONV:%.*]] = phi i64 [ [[CONV2:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -107,5 +99,5 @@ done: ; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"} ; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META3]], [[META2]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll index 496285a276923..4a7c4ba37568e 100644 --- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll +++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll @@ -4,28 +4,19 @@ define void @test_ptr_iv_no_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end) { ; CHECK-LABEL: @test_ptr_iv_no_inbounds( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1_START7:%.*]] = ptrtoint ptr [[P1_START:%.*]] to i64 -; CHECK-NEXT: [[P1_END6:%.*]] = ptrtoint ptr [[P1_END:%.*]] to i64 -; CHECK-NEXT: [[P1_START4:%.*]] = ptrtoint ptr [[P1_START]] to i64 -; CHECK-NEXT: [[P1_END3:%.*]] = ptrtoint ptr [[P1_END]] to i64 +; CHECK-NEXT: [[P1_START4:%.*]] = ptrtoint ptr [[P1_START:%.*]] to i64 +; CHECK-NEXT: [[P1_END3:%.*]] = ptrtoint ptr [[P1_END:%.*]] to i64 ; CHECK-NEXT: [[P1_START2:%.*]] = ptrtoint ptr [[P1_START]] to i64 ; CHECK-NEXT: [[P1_END1:%.*]] = ptrtoint ptr [[P1_END]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P1_END6]], -4 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P1_START7]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P1_END3]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P1_START4]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[P1_END1]] to i2 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[P1_START2]] to i2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i2 [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = zext i2 [[TMP6]] to i64 -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[P1_END3]], -4 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[P1_START4]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[P1_END1]], -4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP4]], [[P1_START2]] ; CHECK-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP9]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 4 @@ -60,8 +51,8 @@ define void @test_ptr_iv_no_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[ENTRY:%.*]] ], [ [[P1_START]], [[VECTOR_SCEVCHECK]] ], [ [[P1_START]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END8]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[ENTRY]] ], [ [[P2_START]], [[VECTOR_SCEVCHECK]] ], [ [[P2_START]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[ENTRY:%.*]] ], [ [[P1_START]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END8]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[ENTRY]] ], [ [[P2_START]], [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P1_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 1a1c05187590e..b4b384400a37b 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -150,15 +150,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] @@ -217,8 +209,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N6]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i32 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i32 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] @@ -913,7 +905,7 @@ exit: ; preds = %loop ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index 08903351aa2f0..a621c0827467a 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -416,11 +416,7 @@ define void @pr43371_pgso(i16 %val) !prof !14 { ; NPGSO-LABEL: define void @pr43371_pgso( ; NPGSO-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] { ; NPGSO-NEXT: [[ENTRY:.*:]] -; NPGSO-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; NPGSO: [[VECTOR_SCEVCHECK]]: -; NPGSO-NEXT: [[TMP0:%.*]] = add i16 [[VAL]], 755 -; NPGSO-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP0]], [[VAL]] -; NPGSO-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NPGSO-NEXT: br label %[[VECTOR_PH:.*]] ; NPGSO: [[VECTOR_PH]]: ; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] ; NPGSO: [[VECTOR_BODY]]: @@ -435,19 +431,8 @@ define void @pr43371_pgso(i16 %val) !prof !14 { ; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] -; NPGSO: [[SCALAR_PH]]: -; NPGSO-NEXT: br label %[[FOR_BODY29:.*]] ; NPGSO: [[FOR_COND_CLEANUP28]]: ; NPGSO-NEXT: unreachable -; NPGSO: [[FOR_BODY29]]: -; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] -; NPGSO-NEXT: [[ADD33:%.*]] = add i16 [[VAL]], [[I24_0170]] -; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 -; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] -; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 -; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 -; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 -; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP21:![0-9]+]] ; ; We do not want to generate SCEV predicates when optimising for size, because ; that will lead to extra code generation such as the SCEV overflow runtime @@ -545,7 +530,7 @@ define i32 @pr45526() optsize { ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 512 -; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: [[TMP4:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP0]], i1 false) ; NPGSO-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1 @@ -643,7 +628,7 @@ define i32 @pr45526_pgso() !prof !14 { ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; NPGSO-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508 -; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 ; NPGSO-NEXT: br label %[[SCALAR_PH:.*]] @@ -654,7 +639,7 @@ define i32 @pr45526_pgso() !prof !14 { ; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ] ; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1 ; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510 -; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP24:![0-9]+]] +; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP23:![0-9]+]] ; NPGSO: [[EXIT]]: ; NPGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ] ; NPGSO-NEXT: ret i32 [[FOR_LCSSA]] @@ -786,7 +771,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; NPGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 -; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[FOR_END:.*]] ; NPGSO: [[FOR_END]]: @@ -891,7 +876,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; NPGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4 ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[SCALAR_PH]] ; NPGSO: [[SCALAR_PH]]: @@ -904,7 +889,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 ; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 -; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; NPGSO: [[FOR_END]]: ; NPGSO-NEXT: ret void ; @@ -1112,11 +1097,10 @@ exit: ; NPGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]} ; NPGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]} ; NPGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]} -; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]} +; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]} ; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]} -; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]} -; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META17]], [[META16]]} +; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META16]]} +; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]], [[META17]]} ; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]} -; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]], [[META17]]} -; NPGSO: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]]} +; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d96134e8adf1d..ccae1b4842217 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -290,12 +290,7 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 ; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; DEFAULT: vector.scevcheck: -; DEFAULT-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) -; DEFAULT-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; DEFAULT-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] @@ -322,8 +317,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: ; DEFAULT-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ] @@ -344,12 +339,7 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; STRIDED-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 ; STRIDED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) ; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 -; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; STRIDED: vector.scevcheck: -; STRIDED-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) -; STRIDED-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; STRIDED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; STRIDED-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] @@ -376,8 +366,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ] -; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; STRIDED-NEXT: br label [[FOR_BODY:%.*]] ; STRIDED: for.body: ; STRIDED-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index 623a9435edec1..1cd1b104ef651 100644 --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -40,35 +40,12 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = sext i8 [[TMP1]] to i32 -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[IDENT_CHECK]] -; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP:%.*]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] @@ -90,8 +67,8 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -162,34 +139,12 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 false -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = sext i8 [[TMP1]] to i32 -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP13]], [[IDENT_CHECK]] -; CHECK-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP:%.*]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> , [[DOTSPLAT]] @@ -211,8 +166,8 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -358,26 +313,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[CSTEP]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[CSTEP]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i8 [[TMP1]], i8 [[CSTEP]] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = sub i8 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i8 [[TMP5]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i1 [[TMP7]], i1 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[CSTEP]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]] -; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] @@ -404,8 +340,8 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr34681.ll b/llvm/test/Transforms/LoopVectorize/pr34681.ll index a04a4e9eea6fe..4e6809102d685 100644 --- a/llvm/test/Transforms/LoopVectorize/pr34681.ll +++ b/llvm/test/Transforms/LoopVectorize/pr34681.ll @@ -31,22 +31,7 @@ define i32 @foo1(i32 %N, ptr nocapture readnone %A, ptr nocapture readonly %B, i ; CHECK-NEXT: br i1 [[CMP8]], [[FOR_END:label %.*]], label %[[FOR_BODY_LR_PH:.*]] ; CHECK: [[FOR_BODY_LR_PH]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[N]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[N]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[N]] -; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[J]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[J]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP4]], [[J]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[J]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i1 [[TMP7]], i1 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] @@ -142,16 +127,7 @@ define i32 @foo2(i16 zeroext %N, ptr nocapture readnone %A, ptr nocapture readon ; CHECK-NEXT: br i1 [[CMP11]], [[FOR_END:label %.*]], label %[[FOR_BODY_LR_PH:.*]] ; CHECK: [[FOR_BODY_LR_PH]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[CONV]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[CONV]], -1 -; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[CONV]], i32 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[J]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[J]] -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[CONV]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[CONV]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 33b3d263e634a..338090b450071 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -20,17 +20,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN1]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1) -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[START]], [[SMIN]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[START]] to i16 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i16 -; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP2]], 65535 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -40,7 +30,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 @@ -50,11 +40,11 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]] -; CHECK: [[PRED_STORE_IF2]]: +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; CHECK: [[PRED_STORE_IF1]]: ; CHECK-NEXT: store i32 10, ptr [[B]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] -; CHECK: [[PRED_STORE_CONTINUE3]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; CHECK: [[PRED_STORE_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 0 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i64 -1 @@ -98,17 +88,7 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN1]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1) -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[START]], [[SMIN]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[START]] to i16 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i16 -; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP2]], 65535 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll index 7a048a9a607ba..d58cfe8fe9ea6 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -20,16 +20,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA2]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 -1, [[ARR1]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[T1_0_LCSSA2]] -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = add i8 1, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP5]], 255 -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] -; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] @@ -54,7 +45,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index 239ad5b29cec5..70e6699087a08 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -366,14 +366,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-SAME: ptr captures(none) [[A:%.*]], i32 [[START:%.*]], i64 [[K:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] @@ -392,7 +385,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -409,33 +402,26 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; DEBUGLOC-SAME: ptr captures(none) [[A:%.*]], i32 [[START:%.*]], i64 [[K:%.*]]) !dbg [[DBG67:![0-9]+]] { ; DEBUGLOC-NEXT: [[ENTRY:.*]]: ; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K]], 4, !dbg [[DBG74:![0-9]+]] -; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !dbg [[DBG74]] -; DEBUGLOC: [[VECTOR_SCEVCHECK]]: -; DEBUGLOC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1, !dbg [[DBG74]] -; DEBUGLOC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32, !dbg [[DBG74]] -; DEBUGLOC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0, !dbg [[DBG74]] -; DEBUGLOC-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295, !dbg [[DBG74]] -; DEBUGLOC-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]], !dbg [[DBG74]] -; DEBUGLOC-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]], !dbg [[DBG75:![0-9]+]] +; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !dbg [[DBG74]] ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4 ; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] -; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG75]] +; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG74]] ; DEBUGLOC: [[VECTOR_BODY]]: -; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG75]] +; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG75:![0-9]+]] ; DEBUGLOC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG76:![0-9]+]] -; DEBUGLOC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32, !dbg [[DBG76]] -; DEBUGLOC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]], !dbg [[DBG77:![0-9]+]] -; DEBUGLOC-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP6]], align 4, !dbg [[DBG78:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32, !dbg [[DBG76]] +; DEBUGLOC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP0]], !dbg [[DBG77:![0-9]+]] +; DEBUGLOC-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4, !dbg [[DBG78:![0-9]+]] ; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG75]] ; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4), !dbg [[DBG76]] -; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG79:![0-9]+]] -; DEBUGLOC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG79]], !llvm.loop [[LOOP80:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG79:![0-9]+]] +; DEBUGLOC-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG79]], !llvm.loop [[LOOP80:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: ; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]], !dbg [[DBG79]] ; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG79]] ; DEBUGLOC: [[SCALAR_PH]]: -; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], !dbg [[DBG75]] +; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG75]] ; DEBUGLOC-NEXT: br label %[[LOOP:.*]], !dbg [[DBG74]] ; DEBUGLOC: [[LOOP]]: ; DEBUGLOC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ], !dbg [[DBG75]] @@ -595,7 +581,7 @@ exit: ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} ;. @@ -679,7 +665,7 @@ exit: ; DEBUGLOC: [[LOOP80]] = distinct !{[[LOOP80]], [[META26]], [[META27]]} ; DEBUGLOC: [[DBG81]] = !DILocation(line: 36, column: 1, scope: [[DBG67]]) ; DEBUGLOC: [[DBG82]] = !DILocation(line: 37, column: 1, scope: [[DBG67]]) -; DEBUGLOC: [[LOOP83]] = distinct !{[[LOOP83]], [[META26]]} +; DEBUGLOC: [[LOOP83]] = distinct !{[[LOOP83]], [[META27]], [[META26]]} ; DEBUGLOC: [[DBG84]] = !DILocation(line: 39, column: 1, scope: [[DBG67]]) ; DEBUGLOC: [[DBG85]] = distinct !DISubprogram(name: "widen_intrinsic_dbg", linkageName: "widen_intrinsic_dbg", scope: null, file: [[META1]], line: 40, type: [[META6]], scopeLine: 40, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META86:![0-9]+]]) ; DEBUGLOC: [[META86]] = !{[[META87]], [[META88]], [[META89]], [[META90]], [[META91]], [[META92]], [[META93]]} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll index e4322cfcc00ac..5302bf5abb03b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll @@ -525,10 +525,7 @@ define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) { ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[IV_PH]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[N]], [[IV_PH]] -; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -551,8 +548,8 @@ define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[IV_PH]], %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[MAX_PH]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index d3e291e4f3ed2..4056563eea62d 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -118,12 +118,7 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-LABEL: define i32 @reverse_induction_i16( ; CHECK-SAME: i16 [[STARTVAL:%.*]], ptr [[PTR:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[STARTVAL]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], 1023 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[TMP0]] -; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -151,22 +146,8 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[LOOPEND:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[ADD_I7:%.*]] = phi i16 [ [[STARTVAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ADD_I]] = add i16 [[ADD_I7]], -1 -; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[ADD_I]] -; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4 -; CHECK-NEXT: [[INC_REDUX]] = add i32 [[TMP_I1]], [[REDUX5]] -; CHECK-NEXT: [[INC4]] = add i32 [[I_06]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC4]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[LOOPEND]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[LOOPEND]]: -; CHECK-NEXT: [[INC_REDUX_LCSSA:%.*]] = phi i32 [ [[INC_REDUX]], %[[FOR_BODY]] ], [ [[TMP17]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[INC_REDUX_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP17]] ; entry: br label %for.body @@ -232,7 +213,7 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[WHILE_END:.*]] ; CHECK: [[WHILE_END]]: @@ -285,7 +266,7 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[WHILE_END:.*]] ; CHECK: [[WHILE_END]]: @@ -315,7 +296,6 @@ while.end: ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll index 5a1844ac450e7..cb32a69ab4ae0 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll @@ -17,14 +17,7 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr ; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[END1]] to i3 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[START_1_INT]] to i3 -; CHECK-NEXT: [[TMP7:%.*]] = sub i3 [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = zext i3 [[TMP7]] to i64 -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] @@ -54,8 +47,8 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ [[START_1]], %[[ENTRY]] ], [ [[START_1]], %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ [[START_2_DIFF]], %[[ENTRY]] ], [ [[START_2_DIFF]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ [[START_1]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ [[START_2_DIFF]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll index 1035642dd78e0..95c5e88f0fcfa 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -17,10 +17,6 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 @@ -44,7 +40,7 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -87,10 +83,6 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[A1]], [[B2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 @@ -114,7 +106,7 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -159,14 +151,6 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i2 -; CHECK-NEXT: [[TMP3:%.*]] = add i2 1, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i2 [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP1]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] -; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[B1]], [[A2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], 16 @@ -192,7 +176,7 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -350,11 +334,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) { ; CHECK-LABEL: @clamped_index_equal_dependence( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] @@ -373,7 +353,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll index 648ebc7e6c3a5..5abb621f579bf 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll @@ -450,22 +450,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) { ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX2]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[SMAX]] to i32 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], [[OFFSET]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295 -; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]] -; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll index 479d859a9287c..527128d613624 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll @@ -21,38 +21,29 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT_1:%.*]] ; CHECK: exit.1: -; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -1) -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX1]], 2 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -1) -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SMAX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8 -; CHECK-NEXT: [[TMP5:%.*]] = add i8 1, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], 255 -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH2:%.*]] -; CHECK: vector.ph2: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH1:%.*]] +; CHECK: vector.ph1: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[N_VEC]] to i8 -; CHECK-NEXT: br label [[VECTOR_BODY3:%.*]] -; CHECK: vector.body3: -; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ 0, [[VECTOR_PH2]] ], [ [[INDEX_NEXT5:%.*]], [[VECTOR_BODY3]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX4]] to i8 -; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[A]], i8 [[TMP10]] -; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP11]], align 1 -; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX4]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT5]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK6:%.*]], label [[VECTOR_BODY3]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: middle.block6: +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[N_VEC]] to i8 +; CHECK-NEXT: br label [[VECTOR_BODY2:%.*]] +; CHECK: vector.body2: +; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY2]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX3]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], i8 [[TMP4]] +; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX3]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK5:%.*]], label [[VECTOR_BODY2]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: middle.block5: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_2:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP9]], [[MIDDLE_BLOCK6]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP3]], [[MIDDLE_BLOCK5]] ], [ 0, [[EXIT_1]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: ; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll index 163faa2254700..39be47a179e98 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll @@ -6,19 +6,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) { ; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ARG]], 1 -; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[ARG]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ADD]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 [[ADD]] -; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP2]], i32 1023) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i1 [[TMP4]], i1 false -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -45,8 +33,9 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) { ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: %add = add i32 %arg, 1 @@ -93,7 +82,7 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK: [[SCALAR_PH]]: @@ -124,7 +113,6 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A3:%.*]] = ptrtoint ptr [[A]] to i64 ; CHECK-NEXT: [[C2:%.*]] = ptrtoint ptr [[C]] to i64 -; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A3]], 16 ; CHECK-NEXT: [[UMAX4:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1)) ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMAX4]], -9 @@ -132,19 +120,7 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[A1]], 16 -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP5]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1)) -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[UMAX]], -9 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[A1]] -; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 3 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i16 -; CHECK-NEXT: [[TMP10:%.*]] = add i16 2, [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i16 [[TMP10]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 65535 -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]] -; CHECK-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[C2]], [[A3]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], 32 @@ -165,7 +141,7 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -226,7 +202,7 @@ define void @no_signed_wrap_iv_via_btc(ptr %dst, i32 %N) mustprogress { ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOPEXIT]], label %[[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index ae03f2426a800..be71ea898a935 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -143,16 +143,7 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64 ; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1) -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 -; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255 -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] @@ -179,8 +170,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[FOUND:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -634,7 +625,7 @@ exit: ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} diff --git a/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll index 22cf860c8b58c..a434c86fc54dd 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll @@ -7,15 +7,9 @@ define void @expand(ptr %src, ptr %dst, i64 %0) { ; CHECK-LABEL: define void @expand( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[TMP0:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 1 -; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP1]], i64 1000) -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SMAX]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0]], 4 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 8 -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 1 ; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP6]], i64 1000) ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[SMAX6]], 4 @@ -26,22 +20,7 @@ define void @expand(ptr %src, ptr %dst, i64 %0) { ; CHECK: [[OUTER_HEADER]]: ; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP3]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP3]]) -; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[SCEVGEP1]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP7]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP5]] @@ -50,7 +29,7 @@ define void @expand(ptr %src, ptr %dst, i64 %0) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[SRC]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[SRC]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer @@ -68,10 +47,10 @@ define void @expand(ptr %src, ptr %dst, i64 %0) { ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP27]] -; CHECK-NEXT: store double [[TMP19]], ptr [[TMP31]], align 8, !alias.scope [[META3]] -; CHECK-NEXT: store double [[TMP19]], ptr [[TMP26]], align 8, !alias.scope [[META3]] -; CHECK-NEXT: store double [[TMP19]], ptr [[TMP33]], align 8, !alias.scope [[META3]] -; CHECK-NEXT: store double [[TMP19]], ptr [[TMP28]], align 8, !alias.scope [[META3]] +; CHECK-NEXT: store double [[TMP5]], ptr [[TMP31]], align 8, !alias.scope [[META3]] +; CHECK-NEXT: store double [[TMP5]], ptr [[TMP26]], align 8, !alias.scope [[META3]] +; CHECK-NEXT: store double [[TMP5]], ptr [[TMP33]], align 8, !alias.scope [[META3]] +; CHECK-NEXT: store double [[TMP5]], ptr [[TMP28]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP29:%.*]] = or disjoint <4 x i64> [[TMP20]], splat (i64 1) ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP29]], i32 0 ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP29]], i32 1 @@ -93,7 +72,7 @@ define void @expand(ptr %src, ptr %dst, i64 %0) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP18]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[OUTER_HEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP18]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[OUTER_HEADER]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[INNER:.*]] ; CHECK: [[INNER]]: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], %[[INNER]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll index f42101ffe89aa..d2113c72ba17a 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll @@ -23,35 +23,61 @@ define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 derefer ; CHECK-LABEL: define dso_local void @foo( ; CHECK-SAME: i32 noundef [[ARG:%.*]], ptr noundef nonnull writeonly align 4 captures(none) dereferenceable(8) [[ARG1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[I3:%.*]] = alloca [[T0:%.*]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[I3]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: store i64 180388626456, ptr [[I3]], align 8 ; CHECK-NEXT: [[I9:%.*]] = sdiv i32 [[ARG]], 128 ; CHECK-NEXT: [[I10:%.*]] = shl nsw i32 [[I9]], 7 ; CHECK-NEXT: [[ARG_OFF:%.*]] = add i32 [[ARG]], 127 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255 -; CHECK-NEXT: br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13:.*]] -; CHECK: [[BB12_LOOPEXIT:.*]]: -; CHECK-NEXT: [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64 -; CHECK-NEXT: [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32 -; CHECK-NEXT: [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64 -; CHECK-NEXT: [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13_PREHEADER:.*]] +; CHECK: [[BB13_PREHEADER]]: +; CHECK-NEXT: [[ARG_OFF10:%.*]] = add i32 [[ARG]], 127 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[ARG_OFF10]], 255 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[BB13:.*]], label %[[VECTOR_BODY_PREHEADER:.*]] +; CHECK: [[VECTOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[I3]], i64 -12 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[I3]], i64 -28 +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !tbaa [[INT_TBAA5:![0-9]+]] +; CHECK-NEXT: [[DOTPROMOTED14:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[REVERSE915:%.*]] = phi <4 x i32> [ [[REVERSE9:%.*]], %[[VECTOR_BODY]] ], [ [[DOTPROMOTED14]], %[[VECTOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = phi <4 x i32> [ [[REVERSE8:%.*]], %[[VECTOR_BODY]] ], [ [[DOTPROMOTED]], %[[VECTOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], [ 0, %[[VECTOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ], [ , %[[VECTOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[REVERSE8]] = mul nsw <4 x i32> [[WIDE_LOAD13]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[STEP_ADD]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[REVERSE9]] = mul nsw <4 x i32> [[REVERSE915]], [[TMP3]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[I10]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[BB12_LOOPEXIT_LOOPEXIT12:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[BB12_LOOPEXIT_LOOPEXIT12]]: +; CHECK-NEXT: store <4 x i32> [[REVERSE8]], ptr [[TMP6]], align 4, !tbaa [[INT_TBAA5]] +; CHECK-NEXT: store <4 x i32> [[REVERSE9]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5]] +; CHECK-NEXT: br label %[[BB12_LOOPEXIT:.*]] +; CHECK: [[BB12_LOOPEXIT]]: +; CHECK-NEXT: [[DOTPRE:%.*]] = load i64, ptr [[I3]], align 8, !tbaa [[CHAR_TBAA13:![0-9]+]] ; CHECK-NEXT: br label %[[BB12]] ; CHECK: [[BB12]]: -; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ] -; CHECK-NEXT: store i64 [[TMP1]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ [[DOTPRE]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ] +; CHECK-NEXT: store i64 [[TMP5]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA13]] +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[I3]]) #[[ATTR2]] ; CHECK-NEXT: ret void ; CHECK: [[BB13]]: -; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], %[[BB13]] ], [ 42, %[[BB]] ] -; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], %[[BB13]] ], [ 24, %[[BB]] ] -; CHECK-NEXT: [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB]] ] -; CHECK-NEXT: [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]] -; CHECK-NEXT: [[I24:%.*]] = or disjoint i32 [[I4_05]], 1 -; CHECK-NEXT: [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]] -; CHECK-NEXT: [[I24_1:%.*]] = or disjoint i32 [[I4_05]], 2 -; CHECK-NEXT: [[I21_2]] = mul nsw i32 [[I21]], [[I24_1]] -; CHECK-NEXT: [[I24_2:%.*]] = or disjoint i32 [[I4_05]], 3 -; CHECK-NEXT: [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]] -; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I4_05]], 4 +; CHECK-NEXT: [[I24_2:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB13_PREHEADER]] ] +; CHECK-NEXT: [[I15:%.*]] = and i32 [[I24_2]], 1 +; CHECK-NEXT: [[I16:%.*]] = zext nneg i32 [[I15]] to i64 +; CHECK-NEXT: [[I5_I_I:%.*]] = getelementptr inbounds nuw i32, ptr [[I3]], i64 [[I16]] +; CHECK-NEXT: [[I21_1:%.*]] = load i32, ptr [[I5_I_I]], align 4, !tbaa [[INT_TBAA5]] +; CHECK-NEXT: [[I21_3:%.*]] = mul nsw i32 [[I21_1]], [[I24_2]] +; CHECK-NEXT: store i32 [[I21_3]], ptr [[I5_I_I]], align 4, !tbaa [[INT_TBAA5]] +; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I24_2]], 1 ; CHECK-NEXT: [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]] -; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP14:![0-9]+]] ; bb: %i = alloca i32, align 4 @@ -168,10 +194,14 @@ attributes #3 = { nounwind } !15 = !{!16, !16, i64 0} !16 = !{!"long", !7, i64 0} ;. -; CHECK: [[CHAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} -; CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0} -; CHECK: [[META7]] = !{!"Simple C++ TBAA"} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]} -; CHECK: [[META9]] = !{!"llvm.loop.mustprogress"} -; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0} +; CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0} +; CHECK: [[META8]] = !{!"Simple C++ TBAA"} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]], [[META12:![0-9]+]]} +; CHECK: [[META10]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[META11]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META12]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[CHAR_TBAA13]] = !{[[META7]], [[META7]], i64 0} +; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META10]], [[META12]], [[META11]]} ;.