diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a7720b11a2b35..4c10a601095f9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7299,6 +7299,7 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::narrowInterleaveGroups( BestVPlan, BestVF, TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)); + VPlanTransforms::cse(BestVPlan); VPlanTransforms::removeDeadRecipes(BestVPlan); VPlanTransforms::convertToConcreteRecipes(BestVPlan, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 11a7d8b339ae9..9616e0a7d1687 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -897,6 +897,11 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { return R && classof(R); } + static inline bool classof(const VPSingleDefRecipe *U) { + auto *R = dyn_cast(U); + return R && classof(R); + } + void execute(VPTransformState &State) override = 0; }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f75b2f21b6f1b..dab2027e76192 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1772,6 +1772,93 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) { } } +namespace { +struct VPCSEDenseMapInfo : public DenseMapInfo { + static bool isSentinel(const VPSingleDefRecipe *Def) { + return Def == getEmptyKey() || Def == getTombstoneKey(); + } + + /// Get any instruction opcode or intrinsic ID data embedded in recipe \p R. + /// Returns an optional pair, where the first element indicates whether it is + /// an intrinsic ID. + static std::optional> + getOpcodeOrIntrinsicID(const VPRecipeBase &R) { + return TypeSwitch>>(&R) + .Case( + [](auto *I) { return std::make_pair(false, I->getOpcode()); }) + .Case([](auto *I) { + return std::make_pair(true, I->getVectorIntrinsicID()); + }) + .Default([](auto *) { return std::nullopt; }); + } + + static bool canHandle(const VPSingleDefRecipe *Def) { + return isa(Def) && + !Def->mayReadFromMemory(); + } + + /// Hash the underlying data of a VPSingleDefRecipe pointer, instead of + /// hashing the pointer itself. + static unsigned getHashValue(const VPSingleDefRecipe *Def) { + return hash_combine(Def->getVPDefID(), getOpcodeOrIntrinsicID(*Def), + vputils::isSingleScalar(Def), + hash_combine_range(Def->operands())); + } + + static bool isEqual(const VPSingleDefRecipe *L, const VPSingleDefRecipe *R) { + if (isSentinel(L) || isSentinel(R)) + return L == R; + bool Result = L->getVPDefID() == R->getVPDefID() && + getOpcodeOrIntrinsicID(*L) == getOpcodeOrIntrinsicID(*R) && + vputils::isSingleScalar(L) == vputils::isSingleScalar(R) && + equal(L->operands(), R->operands()); + assert(!Result || getHashValue(L) == getHashValue(R)); + return Result; + } +}; +} // end anonymous namespace + +/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p +/// Plan. +void VPlanTransforms::cse(VPlan &Plan) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + if (!LoopRegion) + return; + auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly( + vp_depth_first_shallow(Plan.getEntry())); + auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly( + vp_depth_first_shallow(LoopRegion->getEntry())); + + // There is existing logic to sink instructions into replicate regions, and + // we'd be undoing that work if we went through replicate regions. Hence, + // don't CSE in replicate regions. + DenseMap CSEMap; + VPTypeAnalysis TypeInfo(Plan); + for (VPBasicBlock *VPBB : + concat(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) { + for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { + auto *Def = dyn_cast(&R); + if (!Def || !VPCSEDenseMapInfo::canHandle(Def)) + continue; + if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) { + if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V)) + continue; + // Drop poison-generating flags when reusing a value. + if (auto *RFlags = dyn_cast(V)) + RFlags->dropPoisonGeneratingFlags(); + Def->replaceAllUsesWith(V); + continue; + } + CSEMap[Def] = Def; + } + } +} + /// Move loop-invariant recipes out of the vector loop region in \p Plan. static void licm(VPlan &Plan) { VPBasicBlock *Preheader = Plan.getVectorPreheader(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 5de1483783a42..6a63e95ac8b86 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -248,6 +248,10 @@ struct VPlanTransforms { /// removing dead edges to their successors. static void removeBranchOnConst(VPlan &Plan); + /// Perform common-subexpression-elimination, which is best done after the \p + /// Plan is executed. + static void cse(VPlan &Plan); + /// If there's a single exit block, optimize its phi recipes that use exiting /// IV values by feeding them precomputed end values instead, possibly taken /// one step backwards. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll index 221d944e1bc2d..4263d3a213467 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll @@ -54,14 +54,12 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_CONTINUE6]]: ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x double> [ [[TMP20]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[TMP25]] -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP26]], i32 8, <4 x i1> [[TMP4]]) +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP7]], i32 8, <4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq <4 x double> [[TMP24]], zeroinitializer ; CHECK-NEXT: [[TMP29:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP5]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP29]], i32 0 -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP31]], i64 [[TMP25]], i64 [[TMP6]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP31]], i64 [[TMP6]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST_1]], i64 [[PREDPHI]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> splat (i32 10), ptr [[TMP32]], i32 4, <4 x i1> [[TMP30]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 5b15896da8d78..87b8c4af1e0c7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -330,11 +330,10 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[IND_END]] ; CHECK-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT]], [[LOOP]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_1]] ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV_2]], 10 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index efcd810203a44..e6db97d75fb72 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -30,12 +30,9 @@ define void @test1(ptr %dst, {i64, i64} %sv) { ; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; FORCED: [[VECTOR_PH]]: ; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 -; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 +; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 ; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] +; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT2]] ; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -86,16 +83,13 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) { ; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; FORCED: [[VECTOR_PH]]: ; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 -; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 +; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 ; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer ; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]] -; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]]) +; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT2]], <2 x float> [[BROADCAST_SPLAT2]]) ; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 927d1b82bc488..f5ab340d4795b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -21,12 +21,11 @@ define double @test_reduction_costs() { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_1:.*]] ; CHECK: [[LOOP_1]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ] ; CHECK-NEXT: [[R_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ] -; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ] ; CHECK-NEXT: [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00 ; CHECK-NEXT: [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 3cef1f6e03ff9..c156347e6025b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -44,10 +44,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP27:%.*]] = zext [[WIDE_LOAD4]] to ; DEFAULT-NEXT: [[TMP28:%.*]] = mul [[TMP26]], [[TMP13]] ; DEFAULT-NEXT: [[TMP29:%.*]] = mul [[TMP27]], [[TMP13]] -; DEFAULT-NEXT: [[TMP30:%.*]] = zext [[WIDE_LOAD]] to -; DEFAULT-NEXT: [[TMP31:%.*]] = zext [[WIDE_LOAD4]] to -; DEFAULT-NEXT: [[TMP32:%.*]] = or [[TMP28]], [[TMP30]] -; DEFAULT-NEXT: [[TMP33:%.*]] = or [[TMP29]], [[TMP31]] +; DEFAULT-NEXT: [[TMP32:%.*]] = or [[TMP28]], [[TMP26]] +; DEFAULT-NEXT: [[TMP33:%.*]] = or [[TMP29]], [[TMP27]] ; DEFAULT-NEXT: [[TMP34:%.*]] = lshr [[TMP32]], splat (i16 1) ; DEFAULT-NEXT: [[TMP35:%.*]] = lshr [[TMP33]], splat (i16 1) ; DEFAULT-NEXT: [[TMP36:%.*]] = trunc [[TMP34]] to @@ -118,8 +116,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, [[ACTIVE_LANE_MASK]], poison) ; PRED-NEXT: [[TMP17:%.*]] = zext [[WIDE_MASKED_LOAD]] to ; PRED-NEXT: [[TMP22:%.*]] = mul [[TMP17]], [[TMP16]] -; PRED-NEXT: [[TMP24:%.*]] = zext [[WIDE_MASKED_LOAD]] to -; PRED-NEXT: [[TMP20:%.*]] = or [[TMP22]], [[TMP24]] +; PRED-NEXT: [[TMP20:%.*]] = or [[TMP22]], [[TMP17]] ; PRED-NEXT: [[TMP21:%.*]] = lshr [[TMP20]], splat (i16 1) ; PRED-NEXT: [[TMP23:%.*]] = trunc [[TMP21]] to ; PRED-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index 1c4b62183d939..b6874d1025884 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -19,7 +19,6 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -29,7 +28,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <64 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i8> [[WIDE_VEC]], <64 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[STRIDED_VEC]] to <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP2]], <16 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP8]], align 1 @@ -58,7 +57,6 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = insertelement zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = call @llvm.abs.nxv2i32( [[BROADCAST_SPLAT2]], i1 false) ; CHECK-NEXT: [[TMP24:%.*]] = call @llvm.abs.nxv2i32( [[BROADCAST_SPLAT2]], i1 false) ; CHECK-NEXT: [[TMP25:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[BC_RESUME_VAL]], i64 0 @@ -76,7 +74,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], [[VEC_IND]], i32 0, i64 3 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i8.nxv2p0( [[TMP28]], i32 1, splat (i1 true), poison) ; CHECK-NEXT: [[TMP29:%.*]] = zext [[WIDE_MASKED_GATHER]] to -; CHECK-NEXT: [[TMP30:%.*]] = call @llvm.umin.nxv2i32( [[TMP23]], [[TMP29]]) +; CHECK-NEXT: [[TMP30:%.*]] = call @llvm.umin.nxv2i32( [[TMP24]], [[TMP29]]) ; CHECK-NEXT: [[TMP31:%.*]] = call @llvm.umin.nxv2i32( [[TMP24]], [[TMP30]]) ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 1 @@ -155,7 +153,6 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -165,7 +162,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <64 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i8> [[WIDE_VEC]], <64 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[STRIDED_VEC]] to <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP2]], <16 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]]) ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP26]], align 1 @@ -194,7 +191,6 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = insertelement zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = call @llvm.abs.nxv2i32( [[BROADCAST_SPLAT2]], i1 false) ; CHECK-NEXT: [[TMP24:%.*]] = call @llvm.abs.nxv2i32( [[BROADCAST_SPLAT2]], i1 false) ; CHECK-NEXT: [[TMP25:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[BC_RESUME_VAL]], i64 0 @@ -212,7 +208,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], [[VEC_IND]], i32 0, i64 3 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i8.nxv2p0( [[TMP28]], i32 1, splat (i1 true), poison) ; CHECK-NEXT: [[TMP29:%.*]] = zext [[WIDE_MASKED_GATHER]] to -; CHECK-NEXT: [[TMP30:%.*]] = call @llvm.umin.nxv2i32( [[TMP23]], [[TMP29]]) +; CHECK-NEXT: [[TMP30:%.*]] = call @llvm.umin.nxv2i32( [[TMP24]], [[TMP29]]) ; CHECK-NEXT: [[TMP31:%.*]] = call @llvm.umin.nxv2i32( [[TMP24]], [[TMP30]]) ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll index 8c4eba61b6ba2..09a1c17087af2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll @@ -9,9 +9,8 @@ define void @licm_replicate_call(double %x, ptr %dst) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00) ; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00) -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index aed1c3d9fcc4c..dac0b3ee4511d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -65,9 +65,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] ; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8 ; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]] -; TFCOMMON-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3]] ; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1 +; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1 ; TFCOMMON-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer ; TFCOMMON-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer ; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer @@ -107,17 +106,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]] -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]] -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1 -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]] -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]] +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0 -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1 -; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1 ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = fcmp ule <2 x double> [[TMP12]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK2]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP18]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 7028678b338f0..98f5a28a1fa43 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -980,17 +980,14 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]] ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: -; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP19:.*]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP19]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP19]] ] +; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP18:.*]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP18]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP18]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8 -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]] -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]] -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00 +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ogt double [[TMP6]], 0.000000e+00 -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = xor i1 [[TMP7]], true ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP8]], true -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP9]], i1 false +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP10]], i1 false ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP10]], i1 false ; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP11]], double 1.000000e+00, double 0.000000e+00 ; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP12]], double 1.000000e+00, double 0.000000e+00 @@ -1000,11 +997,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true ; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] -; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[TMP19]] -; TFA_INTERLEAVE: [[BB18]]: +; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB15:.*]], label %[[TMP18]] +; TFA_INTERLEAVE: [[BB15]]: ; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8 -; TFA_INTERLEAVE-NEXT: br label %[[TMP19]] -; TFA_INTERLEAVE: [[TMP19]]: +; TFA_INTERLEAVE-NEXT: br label %[[TMP18]] +; TFA_INTERLEAVE: [[TMP18]]: ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index 5e99425c1482c..ae28eb4c3a49d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -39,8 +39,7 @@ define void @foo_i32(i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[FOR_BODY31:.*]] ; CHECK: [[FOR_BODY31]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index c1725cf498b74..65021638b127d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -977,10 +977,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: -; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = mul nuw i32 [[TMP19]], 8 -; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1 -; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = extractelement [[TMP17]], i32 [[TMP21]] ; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8 ; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1 @@ -1021,10 +1017,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: -; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = mul nuw i32 [[TMP23]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sub i32 [[TMP31]], 1 -; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = extractelement [[TMP21]], i32 [[TMP25]] ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul nuw i32 [[TMP27]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = sub i32 [[TMP28]], 1 @@ -1059,10 +1051,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-MAXBW: middle.block: -; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = mul nuw i32 [[TMP26]], 8 -; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = sub i32 [[TMP27]], 1 -; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement [[TMP21]], i32 [[TMP28]] ; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8 ; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index c6e10c50c6f44..66bb8735c4ce2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -152,11 +152,10 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TMP4]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP5:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT2]] to <16 x i8> -; DEFAULT-NEXT: [[TMP8:%.*]] = and <16 x i8> [[TMP5]], [[TMP7]] ; DEFAULT-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP5]], [[TMP7]] ; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP10]], i32 16 -; DEFAULT-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP10]], align 1, !alias.scope [[META8:![0-9]+]], !noalias [[META5]] +; DEFAULT-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 1, !alias.scope [[META8:![0-9]+]], !noalias [[META5]] ; DEFAULT-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP13]], align 1, !alias.scope [[META8]], !noalias [[META5]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index b007db973ba5c..169ddf023cb91 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -17,12 +17,9 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]] ; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[EXTRACT0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[EXTRACT0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT2]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP7:%.*]] = add [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll index e04b550a004ad..ab975a6b9ffcf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll @@ -972,8 +972,7 @@ define void @test_2xi64_sub_of_wide_loads(ptr noalias %data, ptr noalias %A, ptr ; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; VF4-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] -; VF4-NEXT: [[TMP8:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; VF4-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP8]], <8 x i32> +; VF4-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP5]], <8 x i32> ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll index 2c85b75dda018..36c3a2a612d82 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll @@ -16,12 +16,10 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]] -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 59f1f4a6e54c5..7cb7a5e4fea86 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -293,11 +293,10 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: br label [[EXIT:%.*]] ; NOSTRIDED: scalar.ph: -; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: -; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; NOSTRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ] ; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]] ; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 @@ -574,11 +573,10 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: br label [[EXIT:%.*]] ; NOSTRIDED: scalar.ph: -; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: -; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; NOSTRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ] ; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]] ; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll index 04e0dafba6b86..7ae83d484e831 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll @@ -98,11 +98,10 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOPS:.*]] ; CHECK: [[LOOPS]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOPS]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ] ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], 0 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 0 ; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[IV]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 1225e0fc583e1..ec9bdbd337187 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -38,8 +38,7 @@ define void @foo(i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[FOR_BODY31:.*]] ; CHECK: [[FOR_BODY31]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] @@ -71,8 +70,7 @@ define void @foo(i32 %n) { ; AVX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> [[VEC_IND]] ; AVX-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[VEC_IND]] to <8 x i32> ; AVX-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> splat (i1 true)) -; AVX-NEXT: [[TMP7:%.*]] = trunc <8 x i64> [[VEC_IND]] to <8 x i32> -; AVX-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> [[TMP7]], [[BROADCAST_SPLAT]] +; AVX-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; AVX-NEXT: br label %[[FOR_BODY31:.*]] ; AVX: [[FOR_BODY31]]: ; AVX-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index 0b61f207d041a..bbd94117ce6b5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -39,9 +39,8 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK: [[PRED_UREM_IF5]]: ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]] ; CHECK: [[PRED_UREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) ; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) -; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 1, i64 [[TMP12]] +; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 99650592d2dea..4755931e33d91 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -140,15 +140,12 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP32]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[TMP34]]) -; CHECK-NEXT: [[TMP35:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1) -; CHECK-NEXT: [[TMP36:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP35]] +; CHECK-NEXT: [[TMP49:%.*]] = or <16 x i64> [[VEC_IND37]], splat (i64 1) +; CHECK-NEXT: [[TMP36:%.*]] = add <16 x i64> [[TMP30]], [[TMP49]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP36]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP37]], i32 8, <16 x i1> [[TMP34]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP38:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1) -; CHECK-NEXT: [[TMP39:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP38]] -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP39]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP40]], i32 8, <16 x i1> [[BROADCAST_SPLAT]]) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP37]], i32 8, <16 x i1> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[INDEX_NEXT39]] = add nuw i64 [[INDEX34]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) ; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) @@ -192,15 +189,12 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP46]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[TMP48]]) -; CHECK-NEXT: [[TMP49:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1) -; CHECK-NEXT: [[TMP50:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP49]] +; CHECK-NEXT: [[TMP54:%.*]] = or <8 x i64> [[VEC_IND70]], splat (i64 1) +; CHECK-NEXT: [[TMP50:%.*]] = add <8 x i64> [[TMP44]], [[TMP54]] ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP50]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP51]], i32 8, <8 x i1> [[TMP48]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[BROADCAST_SPLAT73]]) -; CHECK-NEXT: [[TMP52:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1) -; CHECK-NEXT: [[TMP53:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP53]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP54]], i32 8, <8 x i1> [[BROADCAST_SPLAT73]]) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP51]], i32 8, <8 x i1> [[BROADCAST_SPLAT73]]) ; CHECK-NEXT: [[INDEX_NEXT74]] = add nuw i64 [[INDEX61]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 373c8e0b385c2..fe77d58708aa9 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -695,12 +695,10 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP28]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP20]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP29]], i64 4), "dereferenceable"(ptr [[TMP19]], i64 4) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP19]], i64 4), "dereferenceable"(ptr [[TMP19]], i64 4) ] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 74df675a75cbd..f9b3d37607092 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -1024,11 +1024,10 @@ define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1090,11 +1089,10 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index 715ea1c51aba6..b815c9d41e35c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -122,12 +122,11 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SCALAR_RECUR6:%.*]] = phi float [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[NEG:%.*]] = fneg float [[SCALAR_RECUR6]] ; CHECK-NEXT: [[MULADD:%.*]] = call float @llvm.fmuladd.f32(float [[SCALAR_RECUR]], float [[NEG]], float 0.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 0526f1b4ed1e3..ddc7d4a66e6ae 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1195,11 +1195,10 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 @@ -1222,11 +1221,10 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 @@ -1249,11 +1247,10 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: br label [[FOR_END:%.*]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1 ; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 77b91ccb913cf..d36b849589513 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -5834,12 +5834,11 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]] @@ -5956,12 +5955,11 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll index 8a4820949af12..2384c5d4c9c90 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll @@ -36,12 +36,11 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i65 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_I65:%.*]] = phi i65 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_I65_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TRUNC]] = trunc i65 [[IV_I65]] to i64 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TRUNC]] ; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll index 651210df823dd..7ffa8c07b8825 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll @@ -29,11 +29,10 @@ define void @gep_for_first_member_does_not_dominate_insert_point(ptr %str, ptr n ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i64 [[IV2]], 1 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[STR]], i64 [[OR_1]] ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[GEP1]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll index 6bc2f389d65ce..4244ef690ac0c 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll @@ -36,8 +36,7 @@ define void @foo(i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[FOR_BODY31:.*]] ; CHECK: [[FOR_BODY31]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll index 2b21eb21bb947..0556750d5522d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll +++ b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll @@ -18,7 +18,6 @@ define i16 @duplicate_lcssa(i16 %val) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2 ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -32,7 +31,7 @@ define i16 @duplicate_lcssa(i16 %val) { ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i16 [[IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[LCSSA_1:%.*]] = phi i16 [ [[RES]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[LCSSA_1:%.*]] = phi i16 [ [[RES]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI1]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[LCSSA_2:%.*]] = phi i16 [ [[RES]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI1]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[LCSSA_2]] ; diff --git a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll index b6c72056b0c5c..611e6c184625c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll @@ -24,11 +24,10 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_2_PREHEADER:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_1:%.*]] ; CHECK: loop.1: -; CHECK-NEXT: [[IV761:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT77:%.*]], [[LOOP_1]] ] +; CHECK-NEXT: [[IV761:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_NEXT77:%.*]], [[LOOP_1]] ] ; CHECK-NEXT: [[IV4:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_1]] ] ; CHECK-NEXT: [[IV_NEXT77]] = add i64 [[IV761]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I50:%.*]] = getelementptr i32, ptr [[TMP0:%.*]], i64 [[IV76:%.*]] @@ -49,20 +48,20 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH5]], label [[VECTOR_PH6:%.*]] -; CHECK: vector.ph6: +; CHECK: vector.ph5: ; CHECK-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF8]] ; CHECK-NEXT: br label [[VECTOR_BODY10:%.*]] -; CHECK: vector.body9: +; CHECK: vector.body8: ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ 0, [[VECTOR_PH6]] ], [ [[INDEX_NEXT13:%.*]], [[VECTOR_BODY10]] ] -; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope !4, !noalias !7 +; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX12]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK12:%.*]], label [[VECTOR_BODY10]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: middle.block12: +; CHECK: middle.block11: ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[LOOP_3_LR_PH:%.*]], label [[SCALAR_PH5]] -; CHECK: scalar.ph4: +; CHECK: scalar.ph3: ; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK12]] ], [ 0, [[LOOP_2_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.3.lr.ph: @@ -70,7 +69,7 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[ARRAYIDX_I_I62:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[IDXPROM_I_I61]] ; CHECK-NEXT: [[MIN_ITERS_CHECK22:%.*]] = icmp ult i64 [[TMP3]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK22]], label [[SCALAR_PH21:%.*]], label [[VECTOR_MEMCHECK15:%.*]] -; CHECK: vector.memcheck15: +; CHECK: vector.memcheck14: ; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4 ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[IDXPROM_I_I61]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 4 @@ -79,20 +78,20 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[BOUND118:%.*]] = icmp ult ptr [[ARRAYIDX_I_I62]], [[SCEVGEP15]] ; CHECK-NEXT: [[FOUND_CONFLICT19:%.*]] = and i1 [[BOUND017]], [[BOUND118]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT19]], label [[SCALAR_PH21]], label [[VECTOR_PH24:%.*]] -; CHECK: vector.ph23: +; CHECK: vector.ph22: ; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF24]] ; CHECK-NEXT: br label [[VECTOR_BODY27:%.*]] -; CHECK: vector.body26: +; CHECK: vector.body25: ; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ 0, [[VECTOR_PH24]] ], [ [[INDEX_NEXT29:%.*]], [[VECTOR_BODY27]] ] ; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope [[META10:![0-9]+]], !noalias [[META13:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX29]], 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC25]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK29:%.*]], label [[VECTOR_BODY27]], !llvm.loop [[LOOP15:![0-9]+]] -; CHECK: middle.block29: +; CHECK: middle.block28: ; CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC25]] ; CHECK-NEXT: br i1 [[CMP_N27]], label [[LOOP_CLEANUP:%.*]], label [[SCALAR_PH21]] -; CHECK: scalar.ph21: +; CHECK: scalar.ph20: ; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], [[MIDDLE_BLOCK29]] ], [ 0, [[LOOP_3_LR_PH]] ], [ 0, [[VECTOR_MEMCHECK15]] ] ; CHECK-NEXT: br label [[LOOP_3:%.*]] ; CHECK: loop.2: diff --git a/llvm/test/Transforms/LoopVectorize/pseudoprobe.ll b/llvm/test/Transforms/LoopVectorize/pseudoprobe.ll index 42f1056beecf9..1054e8534c0b6 100644 --- a/llvm/test/Transforms/LoopVectorize/pseudoprobe.ll +++ b/llvm/test/Transforms/LoopVectorize/pseudoprobe.ll @@ -25,12 +25,11 @@ for.end: ; preds = %for.body ret i32 0 } - ; CHECK-LABEL: @test1 ; CHECK: vector.body: ; CHECK: load <4 x float>, ptr %{{.*}} -; CHECK: store <4 x i32> %{{.*}}, ptr %{{.*}} -; CHECK-COUNT-4: call void @llvm.pseudoprobe(i64 3666282617048535130, i64 2, i32 0, i64 -1) +; CHECK: store <4 x i32> {{.*}}, ptr %{{.*}} +; CHECK: call void @llvm.pseudoprobe(i64 3666282617048535130, i64 2, i32 0, i64 -1) ; CHECK: %index.next = add nuw i64 %index, 4 diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index cea16c9eb7513..7ba11eb788a61 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -40,12 +40,11 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[STARTVAL]], %[[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[ADD_I7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_I]] = add i64 [[ADD_I7]], -1 ; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD_I]] ; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4 @@ -111,12 +110,11 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i128 [ [[STARTVAL]], %[[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[ADD_I7:%.*]] = phi i128 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_I]] = add i128 [[ADD_I7]], -1 ; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[ADD_I]] ; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4 @@ -192,12 +190,11 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[STARTVAL]], %[[ENTRY]] ], [ [[STARTVAL]], %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[ADD_I7:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_I]] = add i16 [[ADD_I7]], -1 ; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[ADD_I]] ; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 842ff910c89d3..640e8b976dbe9 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -102,8 +102,7 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [32 x i16], ptr @src, i16 0, i16 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP3]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> [[WIDE_LOAD]], <2 x i16> splat (i16 1) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[PREDPHI]] @@ -183,10 +182,10 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND2]], <2 x i16> [[VEC_IND3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 @@ -199,8 +198,8 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: store <2 x i16> [[TMP9]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i16> [[VEC_IND2]], splat (i16 2) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -290,8 +289,7 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP15:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> splat (i16 1) ; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[PREDPHI]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index ef6ce08da5230..032b74a0a62cb 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -111,18 +111,17 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2: vector.body: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP1:%.*]] = udiv i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]] -; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] -; VF2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 -; VF2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42 -; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[TMP0]] +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 +; VF2-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42 +; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; VF2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: scalar.ph: