diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 00cdb66d8b779..94b9fe9581264 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2764,8 +2764,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) { bool LoopAccessInfo::isInvariant(Value *V) const { auto *SE = PSE->getSE(); - // TODO: Is this really what we want? Even without FP SCEV, we may want some - // trivially loop-invariant FP values to be considered invariant. + if (TheLoop->isLoopInvariant(V)) + return true; if (!SE->isSCEVable(V->getType())) return false; const SCEV *S = SE->getSCEV(V); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e14f985efd96a..dffc000fbd338 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3107,14 +3107,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const { // is correct. The easiest form of the later is to require that all values // stored are the same. return !(Legal->isInvariant(getLoadStorePointerOperand(I)) && - TheLoop->isLoopInvariant(cast(I)->getValueOperand())); + Legal->isInvariant(cast(I)->getValueOperand())); } case Instruction::UDiv: case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: // If the divisor is loop-invariant no predication is needed. - return !TheLoop->isLoopInvariant(I->getOperand(1)); + return !Legal->isInvariant(I->getOperand(1)); } } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 51a8b451dffd9..a1201dcfbdf57 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -17,126 +17,16 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32> ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9) ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]] -; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP8]], i32 0 -; CHECK-NEXT: store i8 [[TMP19]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[VECTOR_BODY]] -; CHECK: pred.store.continue: -; CHECK-NEXT: [[CMP_N:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] -; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP8]], i32 1 -; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[SCALAR_PH]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2 -; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP8]], i32 2 -; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] -; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP8]], i32 3 -; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]] -; CHECK: pred.store.continue8: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4 -; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[TMP8]], i32 4 -; CHECK-NEXT: store i8 [[TMP18]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] -; CHECK: pred.store.continue10: -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5 -; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] -; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[TMP8]], i32 5 -; CHECK-NEXT: store i8 [[TMP20]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue12: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] -; CHECK: pred.store.if13: -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[TMP8]], i32 6 -; CHECK-NEXT: store i8 [[TMP22]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] -; CHECK: pred.store.continue14: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7 -; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]] -; CHECK: pred.store.if15: -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP8]], i32 7 -; CHECK-NEXT: store i8 [[TMP24]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] -; CHECK: pred.store.continue16: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8 -; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]] -; CHECK: pred.store.if17: -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP8]], i32 8 -; CHECK-NEXT: store i8 [[TMP26]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]] -; CHECK: pred.store.continue18: -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9 -; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]] -; CHECK: pred.store.if19: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP8]], i32 9 -; CHECK-NEXT: store i8 [[TMP28]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] -; CHECK: pred.store.continue20: -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] -; CHECK: pred.store.if21: -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP8]], i32 10 -; CHECK-NEXT: store i8 [[TMP30]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] -; CHECK: pred.store.continue22: -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11 -; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] -; CHECK: pred.store.if23: -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP8]], i32 11 -; CHECK-NEXT: store i8 [[TMP32]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] -; CHECK: pred.store.continue24: -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12 -; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] -; CHECK: pred.store.if25: -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP8]], i32 12 -; CHECK-NEXT: store i8 [[TMP34]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] -; CHECK: pred.store.continue26: -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13 -; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]] -; CHECK: pred.store.if27: -; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP8]], i32 13 -; CHECK-NEXT: store i8 [[TMP36]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] -; CHECK: pred.store.continue28: -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14 -; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] -; CHECK: pred.store.if29: -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP8]], i32 14 -; CHECK-NEXT: store i8 [[TMP38]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] -; CHECK: pred.store.continue30: -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.if31: ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15 ; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue32: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll b/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll new file mode 100644 index 0000000000000..0a975108edeed --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll @@ -0,0 +1,263 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s + +define void @loop_invariant_store(ptr %p, i64 %a, i8 %b) { +; CHECK-LABEL: define void @loop_invariant_store( +; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT2]], splat (i64 48) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[TMP0]], splat (i64 52) +; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8) +; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8) +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3 +; CHECK-NEXT: store i8 [[TMP9]], ptr [[P]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[ADD]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2 +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48 +; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52 +; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32 +; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]] +; CHECK: [[COND_FALSE]]: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], %[[LOOP_HEADER]] ], [ [[ZEXT]], %[[COND_FALSE]] ] +; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8 +; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 8 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %iv.next = add i32 %iv, 1 + %cmp.slt = icmp slt i32 %iv, 2 + %shl = shl i64 %a, 48 + %ashr = ashr i64 %shl, 52 + %trunc.i32 = trunc i64 %ashr to i32 + br i1 %cmp.slt, label %cond.false, label %loop.latch + +cond.false: ; preds = %loop.header + %zext = zext i8 %b to i32 + br label %loop.latch + +loop.latch: ; preds = %cond.false, %loop.header + %cond = phi i32 [ %trunc.i32, %loop.header ], [ %zext, %cond.false ] + %shl.i32 = shl i32 %cond, 8 + %trunc = trunc i32 %shl.i32 to i8 + store i8 %trunc, ptr %p, align 1 + %exitcond = icmp slt i32 %iv, 8 + br i1 %exitcond, label %loop.header, label %exit + +exit: ; preds = %loop.latch + ret void +} + +define void @loop_invariant_srem(ptr %p, i64 %a, i8 %b) { +; CHECK-LABEL: define void @loop_invariant_srem( +; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT2]], splat (i64 48) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[TMP0]], splat (i64 52) +; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE10:.*]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE10]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IND:%.*]] = add <4 x i32> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8) +; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i8> [[VEC_IND1]], splat (i8 2) +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8) +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = srem <4 x i8> [[VEC_IND1]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i8> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], i8 [[TMP13]] +; CHECK-NEXT: store i32 4, ptr [[TMP12]], align 4 +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] +; CHECK: [[PRED_STORE_IF5]]: +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P]], i8 [[TMP16]] +; CHECK-NEXT: store i32 4, ptr [[TMP15]], align 4 +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] +; CHECK: [[PRED_STORE_CONTINUE6]]: +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 +; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; CHECK: [[PRED_STORE_IF7]]: +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i8> [[TMP11]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], i8 [[TMP20]] +; CHECK-NEXT: store i32 4, ptr [[TMP19]], align 4 +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; CHECK: [[PRED_STORE_CONTINUE8]]: +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 +; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10]] +; CHECK: [[PRED_STORE_IF9]]: +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i8> [[TMP11]], i32 3 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[P]], i8 [[TMP21]] +; CHECK-NEXT: store i32 4, ptr [[TMP23]], align 4 +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; CHECK: [[PRED_STORE_CONTINUE10]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND1]], splat (i8 4) +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i8 [[IV]], 2 +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48 +; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52 +; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32 +; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]] +; CHECK: [[COND_FALSE]]: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], %[[LOOP_HEADER]] ], [ [[ZEXT]], %[[COND_FALSE]] ] +; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8 +; CHECK-NEXT: [[REM:%.*]] = srem i8 [[IV]], [[TRUNC]] +; CHECK-NEXT: [[GEP_P_REM:%.*]] = getelementptr i32, ptr [[P]], i8 [[REM]] +; CHECK-NEXT: store i32 4, ptr [[GEP_P_REM]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV]], 8 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop.latch ] + %iv.next = add i8 %iv, 1 + %cmp.slt = icmp slt i8 %iv, 2 + %shl = shl i64 %a, 48 + %ashr = ashr i64 %shl, 52 + %trunc.i32 = trunc i64 %ashr to i32 + br i1 %cmp.slt, label %cond.false, label %loop.latch + +cond.false: ; preds = %loop.header + %zext = zext i8 %b to i32 + br label %loop.latch + +loop.latch: ; preds = %cond.false, %loop.header + %cond = phi i32 [ %trunc.i32, %loop.header ], [ %zext, %cond.false ] + %shl.i32 = shl i32 %cond, 8 + %trunc = trunc i32 %shl.i32 to i8 + %rem = srem i8 %iv, %trunc + %gep.p.rem = getelementptr i32, ptr %p, i8 %rem + store i32 4, ptr %gep.p.rem + %ec = icmp eq i8 %iv, 8 + br i1 %ec, label %exit, label %loop.header + +exit: ; preds = %loop.latch + ret void +} + +define void @loop_invariant_float_store(ptr %p, i32 %a) { +; CHECK-LABEL: define void @loop_invariant_float_store( +; CHECK-SAME: ptr [[P:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP10:%.*]] = sitofp i32 [[A]] to float +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: store float [[TMP10]], ptr [[P]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2 +; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]] +; CHECK: [[COND_FALSE]]: +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: store float [[TMP10]], ptr [[P]], align 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[IV]], 8 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %a.conv = sitofp i32 %a to float + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %iv.next = add i32 %iv, 1 + %cmp.slt = icmp slt i32 %iv, 2 + br i1 %cmp.slt, label %cond.false, label %loop.latch + +cond.false: ; preds = %loop.header + br label %loop.latch + +loop.latch: ; preds = %cond.false, %loop.header + store float %a.conv, ptr %p + %exitcond = icmp slt i32 %iv, 8 + br i1 %exitcond, label %loop.header, label %exit + +exit: ; preds = %loop.latch + ret void +}