Skip to content

Commit ed150a7

Browse files
matborzyszkowskiigcbot
authored andcommitted
Fix replacing memset intrinsic for opaque pointers
Fix replacing memset intrinsic for opaque pointers. Add alloca instruction and global variable to mechanism that recognizes the type for opaque.
1 parent 8e4da30 commit ed150a7

File tree

4 files changed

+163
-64
lines changed

4 files changed

+163
-64
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/ReplaceUnsupportedIntrinsics/ReplaceUnsupportedIntrinsics.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -755,15 +755,18 @@ void ReplaceUnsupportedIntrinsics::replaceMemset(IntrinsicInst *I) {
755755
// For typed pointers we can get a type from more complex type
756756
// like e.g. struct by using GetBaseType(), but for opaque pointers
757757
// we also need to be able to deduce this type, so we can get this
758-
// from investigating GEP instruction and then using GetBaseType().
759-
Type *RawDstType = nullptr;
758+
// from investigating instructions and then using GetBaseType().
759+
Type *RawDstType = Builder.getInt8Ty();
760760
if (IGCLLVM::isOpaquePointerTy(ptrTy)) {
761-
if (GetElementPtrInst *gep = dyn_cast<GetElementPtrInst>(Dst))
761+
if (auto *alloca = dyn_cast<AllocaInst>(Dst))
762+
RawDstType = alloca->getAllocatedType();
763+
else if (auto *gep = dyn_cast<GetElementPtrInst>(Dst))
762764
RawDstType = gep->getResultElementType();
765+
else if (auto *gv = dyn_cast<GlobalVariable>(Dst))
766+
RawDstType = gv->getValueType();
763767
} else {
764768
RawDstType = IGCLLVM::getNonOpaquePtrEltTy(ptrTy);
765769
}
766-
IGC_ASSERT_MESSAGE(RawDstType, "Unexpected type for RawDstType!");
767770

768771
if (Type *BaseType = GetBaseType(RawDstType))
769772
BaseSize = BaseType->getScalarSizeInBits();
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt --typed-pointers -igc-replace-unsupported-intrinsics -verify -S %s -o %t
10+
; RUN: FileCheck %s < %t
11+
12+
; ModuleID = 'memset_kernels'
13+
source_filename = "memset_kernels.ll"
14+
15+
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
16+
declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture writeonly, i8, i64, i1 immarg)
17+
18+
%"class::buf" = type { [8 x i16] }
19+
20+
define void @kernel_gep(%"class::buf" addrspace(1)* %0) {
21+
entry:
22+
; CHECK-LABEL: define void @kernel_gep
23+
; CHECK: [[BCAST:%.*]] = bitcast %"class::buf" addrspace(1)* [[GEP:%.*]] to <16 x i16> addrspace(1)*
24+
; CHECK: [[GEP0:%.*]] = getelementptr <16 x i16>, <16 x i16> addrspace(1)* [[BCAST]], i32 0
25+
; CHECK: store <16 x i16> zeroinitializer, <16 x i16> addrspace(1)* [[GEP0]], align 16
26+
; CHECK: [[GEP1:%.*]] = getelementptr <16 x i16>, <16 x i16> addrspace(1)* [[BCAST]], i32 1
27+
; CHECK: store <16 x i16> zeroinitializer, <16 x i16> addrspace(1)* [[GEP1]], align 16
28+
; CHECK: [[GEP2:%.*]] = getelementptr <16 x i16>, <16 x i16> addrspace(1)* [[BCAST]], i32 2
29+
; CHECK: store <16 x i16> zeroinitializer, <16 x i16> addrspace(1)* [[GEP2]], align 16
30+
; CHECK: [[GEP3:%.*]] = getelementptr <16 x i16>, <16 x i16> addrspace(1)* [[BCAST]], i32 3
31+
; CHECK: store <16 x i16> zeroinitializer, <16 x i16> addrspace(1)* [[GEP3]], align 16
32+
; CHECK: [[GEP4:%.*]] = getelementptr <16 x i16>, <16 x i16> addrspace(1)* [[BCAST]], i32 4
33+
; CHECK: store <16 x i16> zeroinitializer, <16 x i16> addrspace(1)* [[GEP4]], align 16
34+
%gep = getelementptr inbounds %"class::buf", %"class::buf" addrspace(1)* %0, i64 0
35+
%bcast = bitcast %"class::buf" addrspace(1)* %gep to i8 addrspace(1)*
36+
call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 16 %bcast, i8 0, i64 160, i1 false)
37+
ret void
38+
}
39+
40+
define void @kernel_alloca() {
41+
entry:
42+
; CHECK-LABEL: define void @kernel_alloca
43+
; CHECK: [[BC:%.*]] = bitcast [32 x i32]* %buf to <8 x i32>*
44+
; CHECK: [[GEP0:%.*]] = getelementptr <8 x i32>, <8 x i32>* [[BC]], i32 0
45+
; CHECK: store <8 x i32> zeroinitializer, <8 x i32>* [[GEP0]], align 4
46+
; CHECK: [[GEP1:%.*]] = getelementptr <8 x i32>, <8 x i32>* [[BC]], i32 1
47+
; CHECK: store <8 x i32> zeroinitializer, <8 x i32>* [[GEP1]], align 4
48+
; CHECK: [[GEP2:%.*]] = getelementptr <8 x i32>, <8 x i32>* [[BC]], i32 2
49+
; CHECK: store <8 x i32> zeroinitializer, <8 x i32>* [[GEP2]], align 4
50+
%buf = alloca [32 x i32], align 2
51+
%ptr = bitcast [32 x i32]* %buf to i8*
52+
call void @llvm.memset.p0i8.i64(i8* align 4 %ptr, i8 0, i64 128, i1 false)
53+
ret void
54+
}
55+
56+
define void @kernel_alloca_2() {
57+
entry:
58+
; CHECK-LABEL: define void @kernel_alloca_2
59+
; CHECK: [[BC:%.*]] = bitcast [32 x i16]* %buf to <16 x i16>*
60+
; CHECK: [[GEP0:%.*]] = getelementptr <16 x i16>, <16 x i16>* [[BC]], i32 0
61+
; CHECK: store <16 x i16> zeroinitializer, <16 x i16>* [[GEP0]], align 4
62+
; CHECK: [[GEP1:%.*]] = getelementptr <16 x i16>, <16 x i16>* [[BC]], i32 1
63+
; CHECK: store <16 x i16> zeroinitializer, <16 x i16>* [[GEP1]], align 4
64+
%buf = alloca [32 x i16], align 2
65+
%ptr = bitcast [32 x i16]* %buf to i8*
66+
call void @llvm.memset.p0i8.i64(i8* align 4 %ptr, i8 0, i64 64, i1 false)
67+
ret void
68+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-16-plus
10+
11+
; RUN: igc_opt --opaque-pointers -igc-replace-unsupported-intrinsics -verify -S %s -o %t
12+
; RUN: FileCheck %s < %t
13+
14+
; For opaque pointers we need to be able to deduce the type
15+
; and we can find it by investigating instructions
16+
17+
; ModuleID = 'memset_kernels'
18+
source_filename = "memset_kernels.ll"
19+
20+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
21+
declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg)
22+
declare void @llvm.memset.p3.i64(ptr addrspace(3) nocapture writeonly, i8, i64, i1 immarg)
23+
24+
%"class::buf" = type { [8 x i16] }
25+
26+
@global_val = external addrspace(3) global [0 x i8]
27+
28+
define void @kernel_global() {
29+
entry:
30+
; CHECK-LABEL: define void @kernel_global
31+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) [[GV:@.*]], align 32
32+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr inbounds (<8 x i32>, ptr addrspace(3) [[GV]], i32 1), align 32
33+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr (<8 x i32>, ptr addrspace(3) [[GV]], i32 2), align 32
34+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr (<8 x i32>, ptr addrspace(3) [[GV]], i32 3), align 32
35+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr (<8 x i32>, ptr addrspace(3) [[GV]], i32 4), align 32
36+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr (<8 x i32>, ptr addrspace(3) [[GV]], i32 5), align 32
37+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr (<8 x i32>, ptr addrspace(3) [[GV]], i32 6), align 32
38+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr (<8 x i32>, ptr addrspace(3) [[GV]], i32 7), align 32
39+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr (<8 x i32>, ptr addrspace(3) [[GV]], i32 8), align 32
40+
; CHECK: store <8 x i32> zeroinitializer, ptr addrspace(3) getelementptr (<8 x i32>, ptr addrspace(3) [[GV]], i32 9), align 32
41+
call void @llvm.memset.p3.i64(ptr addrspace(3) align 32 @global_val, i8 0, i64 320, i1 false)
42+
ret void;
43+
}
44+
45+
define void @kernel_gep(ptr addrspace(1) %0) {
46+
entry:
47+
; CHECK-LABEL: define void @kernel_gep
48+
; CHECK: [[GEP0:%.*]] = getelementptr <16 x i16>, ptr addrspace(1) [[GEP:%.*]], i32 0
49+
; CHECK: store <16 x i16> zeroinitializer, ptr addrspace(1) [[GEP0]], align 16
50+
; CHECK: [[GEP1:%.*]] = getelementptr <16 x i16>, ptr addrspace(1) [[GEP]], i32 1
51+
; CHECK: store <16 x i16> zeroinitializer, ptr addrspace(1) [[GEP1]], align 16
52+
; CHECK: [[GEP2:%.*]] = getelementptr <16 x i16>, ptr addrspace(1) [[GEP]], i32 2
53+
; CHECK: store <16 x i16> zeroinitializer, ptr addrspace(1) [[GEP2]], align 16
54+
; CHECK: [[GEP3:%.*]] = getelementptr <16 x i16>, ptr addrspace(1) [[GEP]], i32 3
55+
; CHECK: store <16 x i16> zeroinitializer, ptr addrspace(1) [[GEP3]], align 16
56+
%gep = getelementptr inbounds %"class::buf", ptr addrspace(1) %0, i64 0
57+
call void @llvm.memset.p1.i64(ptr addrspace(1) align 16 %gep, i8 0, i64 160, i1 false)
58+
ret void
59+
}
60+
61+
62+
define void @kernel_alloca() {
63+
entry:
64+
; CHECK-LABEL: define void @kernel_alloca
65+
; CHECK: [[BUF:%.*]] = alloca [32 x i32], align 2
66+
; CHECK: [[GEP0:%.*]] = getelementptr <8 x i32>, ptr [[BUF]], i32 0
67+
; CHECK: store <8 x i32> zeroinitializer, ptr [[GEP0]], align 4
68+
; CHECK: [[GEP1:%.*]] = getelementptr <8 x i32>, ptr [[BUF]], i32 1
69+
; CHECK: store <8 x i32> zeroinitializer, ptr [[GEP1]], align 4
70+
; CHECK: [[GEP2:%.*]] = getelementptr <8 x i32>, ptr [[BUF]], i32 2
71+
; CHECK: store <8 x i32> zeroinitializer, ptr [[GEP2]], align 4
72+
%buf = alloca [32 x i32], align 2
73+
call void @llvm.memset.p0.i64(ptr align 4 %buf, i8 0, i64 128, i1 false)
74+
ret void
75+
}
76+
77+
define void @kernel_alloca_2() {
78+
entry:
79+
; CHECK-LABEL: define void @kernel_alloca_2
80+
; CHECK: [[BUF:%.*]] = alloca [32 x i16], align 2
81+
; CHECK: [[GEP0:%.*]] = getelementptr <16 x i16>, ptr [[BUF]], i32 0
82+
; CHECK: store <16 x i16> zeroinitializer, ptr [[GEP0]], align 4
83+
; CHECK: [[GEP1:%.*]] = getelementptr <16 x i16>, ptr [[BUF]], i32 1
84+
; CHECK: store <16 x i16> zeroinitializer, ptr [[GEP1]], align 4
85+
%buf = alloca [32 x i16], align 2
86+
call void @llvm.memset.p0.i64(ptr align 4 %buf, i8 0, i64 64, i1 false)
87+
ret void
88+
}

IGC/Compiler/tests/ReplaceUnsupportedIntrinsics/memset_get_type_from_struct.ll

Lines changed: 0 additions & 60 deletions
This file was deleted.

0 commit comments

Comments
 (0)