diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 70b4552190a4e..f537b0d3fbd75 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1258,8 +1258,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { "Map index doesn't point back to a slice with this user."); } - // Disable SRoA for any intrinsics except for lifetime invariants and - // invariant group. + // Disable SRoA for any intrinsics except for lifetime invariants. // FIXME: What about debug intrinsics? This matches old behavior, but // doesn't make sense. void visitIntrinsicInst(IntrinsicInst &II) { @@ -1279,12 +1278,6 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { return; } - if (II.isLaunderOrStripInvariantGroup()) { - insertUse(II, Offset, AllocSize, true); - enqueueUsers(II); - return; - } - Base::visitIntrinsicInst(II); } @@ -3618,8 +3611,7 @@ class AllocaSliceRewriter : public InstVisitor { } bool visitIntrinsicInst(IntrinsicInst &II) { - assert((II.isLifetimeStartOrEnd() || II.isLaunderOrStripInvariantGroup() || - II.isDroppable()) && + assert((II.isLifetimeStartOrEnd() || II.isDroppable()) && "Unexpected intrinsic!"); LLVM_DEBUG(dbgs() << " original: " << II << "\n"); @@ -3633,9 +3625,6 @@ class AllocaSliceRewriter : public InstVisitor { return true; } - if (II.isLaunderOrStripInvariantGroup()) - return true; - assert(II.getArgOperand(1) == OldPtr); // Lifetime intrinsics are only promotable if they cover the whole alloca. // Therefore, we drop lifetime intrinsics which don't cover the whole diff --git a/llvm/test/Transforms/SROA/invariant-group.ll b/llvm/test/Transforms/SROA/invariant-group.ll index 1be6f6e2fc32b..c9c9e031ca95f 100644 --- a/llvm/test/Transforms/SROA/invariant-group.ll +++ b/llvm/test/Transforms/SROA/invariant-group.ll @@ -11,10 +11,17 @@ declare i32 @somevalue() define void @f() { ; CHECK-LABEL: @f( +; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 +; CHECK-NEXT: [[A1_I8_INV:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[A]]) +; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [[T]], ptr [[A]], i32 0, i32 1 ; CHECK-NEXT: [[SV1:%.*]] = call i32 @somevalue() ; CHECK-NEXT: [[SV2:%.*]] = call i32 @somevalue() -; CHECK-NEXT: call void @h(i32 [[SV1]]) -; CHECK-NEXT: call void @h(i32 [[SV2]]) +; CHECK-NEXT: store i32 [[SV1]], ptr [[A1_I8_INV]], align 4, !invariant.group [[META0:![0-9]+]] +; CHECK-NEXT: store i32 [[SV2]], ptr [[A2]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[A1_I8_INV]], align 4, !invariant.group [[META0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[A2]], align 4 +; CHECK-NEXT: call void @h(i32 [[V1]]) +; CHECK-NEXT: call void @h(i32 [[V2]]) ; CHECK-NEXT: ret void ; %a = alloca %t @@ -44,7 +51,7 @@ define void @g() { ; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [[T]], ptr [[A]], i32 0, i32 1 ; CHECK-NEXT: [[SV1:%.*]] = call i32 @somevalue() ; CHECK-NEXT: [[SV2:%.*]] = call i32 @somevalue() -; CHECK-NEXT: store i32 [[SV1]], ptr [[A1_I8_INV]], align 4, !invariant.group [[META0:![0-9]+]] +; CHECK-NEXT: store i32 [[SV1]], ptr [[A1_I8_INV]], align 4, !invariant.group [[META0]] ; CHECK-NEXT: store i32 [[SV2]], ptr [[A2]], align 4 ; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[A1_I8_INV]], align 4, !invariant.group [[META0]] ; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[A2]], align 4 @@ -81,6 +88,9 @@ define void @g() { define void @store_and_launder() { ; CHECK-LABEL: @store_and_launder( +; CHECK-NEXT: [[VALPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 0, ptr [[VALPTR]], align 4 +; CHECK-NEXT: [[BARR:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[VALPTR]]) ; CHECK-NEXT: ret void ; %valptr = alloca i32, align 4 @@ -91,7 +101,10 @@ define void @store_and_launder() { define i32 @launder_and_load() { ; CHECK-LABEL: @launder_and_load( -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: [[VALPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BARR:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[VALPTR]]) +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[VALPTR]], align 4 +; CHECK-NEXT: ret i32 [[V2]] ; %valptr = alloca i32, align 4 %barr = call ptr @llvm.launder.invariant.group.p0(ptr %valptr) @@ -101,6 +114,9 @@ define i32 @launder_and_load() { define void @launder_and_ptr_arith() { ; CHECK-LABEL: @launder_and_ptr_arith( +; CHECK-NEXT: [[VALPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BARR:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[VALPTR]]) +; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i32, ptr [[VALPTR]], i32 0 ; CHECK-NEXT: ret void ; %valptr = alloca i32, align 4 @@ -140,9 +156,13 @@ end: define void @partial_promotion_of_alloca() { ; CHECK-LABEL: @partial_promotion_of_alloca( -; CHECK-NEXT: [[STRUCT_PTR_SROA_2:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store volatile i32 0, ptr [[STRUCT_PTR_SROA_2]], align 4 -; CHECK-NEXT: [[STRUCT_PTR_SROA_2_0_STRUCT_PTR_SROA_2_4_LOAD_VAL:%.*]] = load volatile i32, ptr [[STRUCT_PTR_SROA_2]], align 4 +; CHECK-NEXT: [[STRUCT_PTR:%.*]] = alloca [[T:%.*]], align 4 +; CHECK-NEXT: [[FIELD_PTR:%.*]] = getelementptr inbounds [[T]], ptr [[STRUCT_PTR]], i32 0, i32 0 +; CHECK-NEXT: store i32 0, ptr [[FIELD_PTR]], align 4 +; CHECK-NEXT: [[VOLATILE_FIELD_PTR:%.*]] = getelementptr inbounds [[T]], ptr [[STRUCT_PTR]], i32 0, i32 1 +; CHECK-NEXT: store volatile i32 0, ptr [[VOLATILE_FIELD_PTR]], align 4, !invariant.group [[META0]] +; CHECK-NEXT: [[BARR:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[STRUCT_PTR]]) +; CHECK-NEXT: [[LOAD_VAL:%.*]] = load volatile i32, ptr [[VOLATILE_FIELD_PTR]], align 4, !invariant.group [[META0]] ; CHECK-NEXT: ret void ; %struct_ptr = alloca %t, align 4 @@ -155,6 +175,61 @@ define void @partial_promotion_of_alloca() { ret void } +define void @memcpy_after_laundering_alloca(ptr %ptr) { +; CHECK-LABEL: @memcpy_after_laundering_alloca( +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca { i64, i64 }, align 8 +; CHECK-NEXT: [[LAUNDER:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[LAUNDER]], ptr [[PTR:%.*]], i64 16, i1 false) +; CHECK-NEXT: ret void +; + %alloca = alloca { i64, i64 }, align 8 + %launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca) + call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 16, i1 false) + ret void +} + +define void @memcpy_after_laundering_alloca_slices(ptr %ptr) { +; CHECK-LABEL: @memcpy_after_laundering_alloca_slices( +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca { [16 x i8], i64, [16 x i8] }, align 8 +; CHECK-NEXT: [[LAUNDER:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[LAUNDER]], i64 16 +; CHECK-NEXT: store i64 0, ptr [[GEP]], align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[LAUNDER]], ptr [[PTR:%.*]], i64 40, i1 false) +; CHECK-NEXT: ret void +; + %alloca = alloca { [16 x i8], i64, [16 x i8] }, align 8 + %launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca) + %gep = getelementptr i8, ptr %launder, i64 16 + store i64 0, ptr %gep + call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 40, i1 false) + ret void +} + +define void @test_agg_store() { +; CHECK-LABEL: @test_agg_store( +; CHECK-NEXT: [[STRUCT_PTR:%.*]] = alloca [[T:%.*]], i64 1, align 4 +; CHECK-NEXT: [[STRUCT_PTR_FRESH:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[STRUCT_PTR]]) +; CHECK-NEXT: [[STRUCT:%.*]] = call [[T]] @[[MAKE_T:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() +; CHECK-NEXT: store [[T]] [[STRUCT]], ptr [[STRUCT_PTR_FRESH]], align 4, !invariant.group [[META0]] +; CHECK-NEXT: [[FIRST_PTR:%.*]] = getelementptr [[T]], ptr [[STRUCT_PTR_FRESH]], i32 0, i32 0 +; CHECK-NEXT: [[FIRST:%.*]] = load i32, ptr [[FIRST_PTR]], align 4 +; CHECK-NEXT: [[SECOND_PTR:%.*]] = getelementptr [[T]], ptr [[STRUCT_PTR_FRESH]], i32 0, i32 1 +; CHECK-NEXT: [[SECOND:%.*]] = load i32, ptr [[SECOND_PTR]], align 4 +; CHECK-NEXT: ret void +; + %struct_ptr = alloca %t, i64 1, align 4 + %struct_ptr_fresh = call ptr @llvm.launder.invariant.group.p0(ptr %struct_ptr) + %struct = call %t @make_t() + store %t %struct, ptr %struct_ptr_fresh, align 4, !invariant.group !0 + %first_ptr = getelementptr %t, ptr %struct_ptr_fresh, i32 0, i32 0 + %first = load i32, ptr %first_ptr, align 4 + %second_ptr = getelementptr %t, ptr %struct_ptr_fresh, i32 0, i32 1 + %second = load i32, ptr %second_ptr, align 4 + ret void +} + +declare %t @make_t() + declare void @use(ptr) !0 = !{}