llvm
diff --git a/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlan.cpp
Lines changed: 32 additions & 0 deletions b/‎llvm/lib/Transforms/Vectorize/VPlan.cpp
Lines changed: 32 additions & 0 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Lines changed: 44 additions & 0 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Lines changed: 44 additions & 0 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Lines changed: 4 additions & 0 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanUtils.h
Lines changed: 10 additions & 0 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanUtils.h
Lines changed: 10 additions & 0 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanValue.h
Lines changed: 7 additions & 0 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanValue.h
Lines changed: 7 additions & 0 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
Lines changed: 2 additions & 3 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
Lines changed: 2 additions & 3 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
Lines changed: 71 additions & 34 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
Lines changed: 71 additions & 34 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
Lines changed: 1 addition & 2 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
Lines changed: 1 addition & 2 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
Lines changed: 1 addition & 2 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
Lines changed: 1 addition & 2 deletions
@@ -7308,6 +7308,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   VPlanTransforms::narrowInterleaveGroups(
       BestVPlan, BestVF,
       TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
+  VPlanTransforms::cse(BestVPlan, *Legal->getWidestInductionType());
   VPlanTransforms::removeDeadRecipes(BestVPlan);
 
   VPlanTransforms::convertToConcreteRecipes(BestVPlan,
 
@@ -122,6 +122,38 @@ void VPDef::dump() const {
 }
 #endif
 
+bool VPValue::isIdenticalTo(const VPValue *Other) const {
+  if (getVPValueID() != Other->getVPValueID() ||
+      hasDefiningRecipe() != Other->hasDefiningRecipe() ||
+      !getUnderlyingValue() != !Other->getUnderlyingValue())
+    return false;
+  Instruction *I = dyn_cast_or_null<Instruction>(getUnderlyingValue());
+  Instruction *OtherI =
+      dyn_cast_or_null<Instruction>(Other->getUnderlyingValue());
+  if (I && OtherI)
+    return I->getOpcode() == OtherI->getOpcode() &&
+           equal(I->operand_values(), OtherI->operand_values());
+  if (hasDefiningRecipe()) {
+    const VPRecipeBase *DefL = getDefiningRecipe();
+    const VPRecipeBase *DefR = Other->getDefiningRecipe();
+    return vputils::getOpcode(*DefL) == vputils::getOpcode(*DefR) &&
+           equal(DefL->operands(), DefR->operands());
+  }
+  return getUnderlyingValue() == Other->getUnderlyingValue();
+}
+
+hash_code llvm::hash_value(const VPValue &V) {
+  if (Instruction *I = dyn_cast_or_null<Instruction>(V.getUnderlyingValue()))
+    return hash_combine(I->getOpcode(),
+                        hash_combine_range(I->operand_values()));
+  if (V.hasDefiningRecipe()) {
+    const VPRecipeBase *Def = V.getDefiningRecipe();
+    return hash_combine(vputils::getOpcode(*Def),
+                        hash_combine_range(Def->operands()));
+  }
+  return hash_combine(V.getVPValueID(), V.getUnderlyingValue());
+}
+
 VPRecipeBase *VPValue::getDefiningRecipe() {
   return cast_or_null<VPRecipeBase>(Def);
 }
 
@@ -1755,6 +1755,50 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
   }
 }
 
+/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
+/// the pointer itself.
+namespace {
+struct CSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
+  static unsigned getHashValue(const VPSingleDefRecipe *R) {
+    return hash_value(*R);
+  }
+
+  static bool isEqual(const VPSingleDefRecipe *LHS,
+                      const VPSingleDefRecipe *RHS) {
+    if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
+        LHS == getTombstoneKey() || RHS == getTombstoneKey())
+      return LHS == RHS;
+    return LHS->isIdenticalTo(RHS);
+  }
+};
+} // end anonymous namespace
+
+/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
+/// Plan.
+void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
+  DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, CSEDenseMapInfo> CSEMap;
+  VPTypeAnalysis TypeInfo(&CanonicalIVTy);
+  // There is existing logic to sink instructions into replicate regions, and
+  // we'd be undoing that work if we went through replicate regions. Hence,
+  // don't CSE in replicate regions.
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_shallow(Plan.getEntry()))) {
+    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+      auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
+      if (!Def)
+        continue;
+      if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
+        if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
+          continue;
+        Def->replaceAllUsesWith(V);
+        Def->eraseFromParent();
+        continue;
+      }
+      CSEMap[Def] = Def;
+    }
+  }
+}
+
 /// Move loop-invariant recipes out of the vector loop region in \p Plan.
 static void licm(VPlan &Plan) {
   VPBasicBlock *Preheader = Plan.getVectorPreheader();
 
@@ -240,6 +240,10 @@ struct VPlanTransforms {
   /// removing dead edges to their successors.
   static void removeBranchOnConst(VPlan &Plan);
 
+  /// Perform common-subexpression-elimination, which is best done after the \p
+  /// Plan is executed.
+  static void cse(VPlan &Plan, Type &CanonicalIVType);
+
   /// If there's a single exit block, optimize its phi recipes that use exiting
   /// IV values by feeding them precomputed end values instead, possibly taken
   /// one step backwards.
 
@@ -10,6 +10,7 @@
 #define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
 
 #include "VPlan.h"
+#include "llvm/ADT/TypeSwitch.h"
 
 namespace llvm {
 class ScalarEvolution;
@@ -37,6 +38,15 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
 /// SCEV expression could be constructed.
 const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
 
+/// Get any instruction opcode data embedded in recipe \p R.
+inline std::optional<unsigned> getOpcode(const VPRecipeBase &R) {
+  return TypeSwitch<const VPRecipeBase *, std::optional<unsigned>>(&R)
+      .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
+            VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
+            VPReplicateRecipe>([](auto *I) { return I->getOpcode(); })
+      .Default([](auto *) { return std::nullopt; });
+}
+
 /// Returns true if \p VPV is a single scalar, either because it produces the
 /// same value for all lanes or only has its first lane used.
 inline bool isSingleScalar(const VPValue *VPV) {
 
@@ -185,8 +185,15 @@ class LLVM_ABI_FOR_TEST VPValue {
     assert(!UnderlyingVal && "Underlying Value is already set.");
     UnderlyingVal = Val;
   }
+
+  // Equality of data.
+  bool isIdenticalTo(const VPValue *Other) const;
 };
 
+// Hash method so VPValue can be de-duplicated in certain
+// contexts.
+hash_code hash_value(const VPValue &Arg);
+
 typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
 typedef DenseMap<VPValue *, Value *> VPValue2ValueTy;
 
 
@@ -330,11 +330,10 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
 ; CHECK-NEXT:    [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[IND_END]]
 ; CHECK-NEXT:    br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT]], [[LOOP]] ]
 ; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_1]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[IV_2]], 10
 
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
 ; REQUIRES: asserts
 
 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-ios %s -S -debug -disable-output 2>&1 | FileCheck --check-prefix=CM %s
@@ -22,23 +23,42 @@
 
 ; Check that the extractvalue operands are actually free in vector code.
 
-; FORCED:         [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
-; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
-; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT:    [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
-; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
-; FORCED-NEXT:    %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT:    [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
-
-; FORCED-LABEL: vector.body:                                      ; preds = %vector.body, %vector.ph
-; FORCED-NEXT:    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; FORCED-NEXT:    [[GEP:%.+]] = getelementptr i64, ptr %dst, i32 %index
-; FORCED-NEXT:    store <2 x i64> [[ADD]], ptr [[GEP]], align 4
-; FORCED-NEXT:    %index.next = add nuw i32 %index, 2
-; FORCED-NEXT:    [[C:%.+]] = icmp eq i32 %index.next, 1000
-; FORCED-NEXT:    br i1 [[C]], label %middle.block, label %vector.body
-
 define void @test1(ptr %dst, {i64, i64} %sv) {
+; FORCED-LABEL: define void @test1(
+; FORCED-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) {
+; FORCED-NEXT:  [[ENTRY:.*]]:
+; FORCED-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; FORCED:       [[VECTOR_PH]]:
+; FORCED-NEXT:    [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
+; FORCED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+; FORCED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT]]
+; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
+; FORCED:       [[VECTOR_BODY]]:
+; FORCED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; FORCED-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
+; FORCED-NEXT:    store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4
+; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; FORCED-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; FORCED-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; FORCED:       [[MIDDLE_BLOCK]]:
+; FORCED-NEXT:    br label %[[EXIT:.*]]
+; FORCED:       [[SCALAR_PH]]:
+; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
+; FORCED-NEXT:    br label %[[LOOP_BODY:.*]]
+; FORCED:       [[LOOP_BODY]]:
+; FORCED-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
+; FORCED-NEXT:    [[A:%.*]] = extractvalue { i64, i64 } [[SV]], 0
+; FORCED-NEXT:    [[B:%.*]] = extractvalue { i64, i64 } [[SV]], 1
+; FORCED-NEXT:    [[ADDR:%.*]] = getelementptr i64, ptr [[DST]], i32 [[IV]]
+; FORCED-NEXT:    [[ADD:%.*]] = add i64 [[A]], [[B]]
+; FORCED-NEXT:    store i64 [[ADD]], ptr [[ADDR]], align 4
+; FORCED-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; FORCED-NEXT:    [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
+; FORCED-NEXT:    br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; FORCED:       [[EXIT]]:
+; FORCED-NEXT:    ret void
+;
 entry:
   br label %loop.body
 
@@ -70,25 +90,42 @@ declare float @powf(float, float) readnone nounwind
 
 ; CM: LV: Scalar loop costs: 14.
 
-; FORCED-LABEL: define void @test_getVectorCallCost
-
-; FORCED:         [[E1:%.+]] = extractvalue { float, float } %sv, 0
-; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
-; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT:    [[E2:%.+]] = extractvalue { float, float } %sv, 1
-; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
-; FORCED-NEXT:    %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
-
-; FORCED-LABEL: vector.body:                                      ; preds = %vector.body, %vector.ph
-; FORCED-NEXT:    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; FORCED-NEXT:    [[GEP1:%.+]] = getelementptr float, ptr %dst, i32 %index
-; FORCED-NEXT:    [[POW:%.+]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %broadcast.splat, <2 x float> %broadcast.splat2)
-; FORCED-NEXT:    store <2 x float> [[POW]], ptr [[GEP1]], align 4
-; FORCED-NEXT:    %index.next = add nuw i32 %index, 2
-; FORCED-NEXT:    [[C:%.+]] = icmp eq i32 %index.next, 1000
-; FORCED-NEXT:    br i1 [[C]], label %middle.block, label %vector.body
-
 define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
+; FORCED-LABEL: define void @test_getVectorCallCost(
+; FORCED-SAME: ptr [[DST:%.*]], { float, float } [[SV:%.*]]) {
+; FORCED-NEXT:  [[ENTRY:.*]]:
+; FORCED-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; FORCED:       [[VECTOR_PH]]:
+; FORCED-NEXT:    [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0
+; FORCED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
+; FORCED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
+; FORCED:       [[VECTOR_BODY]]:
+; FORCED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; FORCED-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
+; FORCED-NEXT:    [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT]])
+; FORCED-NEXT:    store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
+; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; FORCED-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; FORCED-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; FORCED:       [[MIDDLE_BLOCK]]:
+; FORCED-NEXT:    br label %[[EXIT:.*]]
+; FORCED:       [[SCALAR_PH]]:
+; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
+; FORCED-NEXT:    br label %[[LOOP_BODY:.*]]
+; FORCED:       [[LOOP_BODY]]:
+; FORCED-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
+; FORCED-NEXT:    [[A:%.*]] = extractvalue { float, float } [[SV]], 0
+; FORCED-NEXT:    [[B:%.*]] = extractvalue { float, float } [[SV]], 1
+; FORCED-NEXT:    [[ADDR:%.*]] = getelementptr float, ptr [[DST]], i32 [[IV]]
+; FORCED-NEXT:    [[P:%.*]] = call float @powf(float [[A]], float [[B]])
+; FORCED-NEXT:    store float [[P]], ptr [[ADDR]], align 4
+; FORCED-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; FORCED-NEXT:    [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
+; FORCED-NEXT:    br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; FORCED:       [[EXIT]]:
+; FORCED-NEXT:    ret void
+;
 entry:
   br label %loop.body
 
 
@@ -23,12 +23,11 @@ define double @test_reduction_costs() {
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX2:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP_1:.*]]
 ; CHECK:       [[LOOP_1]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ]
 ; CHECK-NEXT:    [[R_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ]
-; CHECK-NEXT:    [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
+; CHECK-NEXT:    [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
 ; CHECK-NEXT:    [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00
 ; CHECK-NEXT:    [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 
@@ -294,7 +294,6 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
-; CHECK-NEXT:    [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
 ; CHECK-NEXT:    [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -321,7 +320,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop: