Skip to content

Commit 8c21064

Browse files
authored
[AArch64][SME] Lower memchr to __arm_sc_memchr in streaming[-compatible] functions (#168896)
This allows us to avoid some streaming-mode switches.
1 parent 52efe03 commit 8c21064

File tree

3 files changed

+210
-15
lines changed

3 files changed

+210
-15
lines changed

llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,29 +156,37 @@ SDValue AArch64SelectionDAGInfo::EmitMOPS(unsigned Opcode, SelectionDAG &DAG,
156156
}
157157

158158
SDValue AArch64SelectionDAGInfo::EmitStreamingCompatibleMemLibCall(
159-
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
159+
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op0, SDValue Op1,
160160
SDValue Size, RTLIB::Libcall LC) const {
161161
const AArch64Subtarget &STI =
162162
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
163163
const AArch64TargetLowering *TLI = STI.getTargetLowering();
164164
TargetLowering::ArgListTy Args;
165-
Args.emplace_back(Dst, PointerType::getUnqual(*DAG.getContext()));
165+
Args.emplace_back(Op0, PointerType::getUnqual(*DAG.getContext()));
166166

167+
bool UsesResult = false;
167168
RTLIB::Libcall NewLC;
168169
switch (LC) {
169170
case RTLIB::MEMCPY: {
170171
NewLC = RTLIB::SC_MEMCPY;
171-
Args.emplace_back(Src, PointerType::getUnqual(*DAG.getContext()));
172+
Args.emplace_back(Op1, PointerType::getUnqual(*DAG.getContext()));
172173
break;
173174
}
174175
case RTLIB::MEMMOVE: {
175176
NewLC = RTLIB::SC_MEMMOVE;
176-
Args.emplace_back(Src, PointerType::getUnqual(*DAG.getContext()));
177+
Args.emplace_back(Op1, PointerType::getUnqual(*DAG.getContext()));
177178
break;
178179
}
179180
case RTLIB::MEMSET: {
180181
NewLC = RTLIB::SC_MEMSET;
181-
Args.emplace_back(DAG.getZExtOrTrunc(Src, DL, MVT::i32),
182+
Args.emplace_back(DAG.getZExtOrTrunc(Op1, DL, MVT::i32),
183+
Type::getInt32Ty(*DAG.getContext()));
184+
break;
185+
}
186+
case RTLIB::MEMCHR: {
187+
UsesResult = true;
188+
NewLC = RTLIB::SC_MEMCHR;
189+
Args.emplace_back(DAG.getZExtOrTrunc(Op1, DL, MVT::i32),
182190
Type::getInt32Ty(*DAG.getContext()));
183191
break;
184192
}
@@ -194,7 +202,9 @@ SDValue AArch64SelectionDAGInfo::EmitStreamingCompatibleMemLibCall(
194202
PointerType *RetTy = PointerType::getUnqual(*DAG.getContext());
195203
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
196204
TLI->getLibcallCallingConv(NewLC), RetTy, Symbol, std::move(Args));
197-
return TLI->LowerCallTo(CLI).second;
205+
206+
auto [Result, ChainOut] = TLI->LowerCallTo(CLI);
207+
return UsesResult ? DAG.getMergeValues({Result, ChainOut}, DL) : ChainOut;
198208
}
199209

200210
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
@@ -255,6 +265,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
255265
return SDValue();
256266
}
257267

268+
std::pair<SDValue, SDValue> AArch64SelectionDAGInfo::EmitTargetCodeForMemchr(
269+
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Src,
270+
SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const {
271+
auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
272+
SMEAttrs Attrs = AFI->getSMEFnAttrs();
273+
if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody()) {
274+
SDValue Result = EmitStreamingCompatibleMemLibCall(
275+
DAG, dl, Chain, Src, Char, Length, RTLIB::MEMCHR);
276+
return std::make_pair(Result.getValue(0), Result.getValue(1));
277+
}
278+
return std::make_pair(SDValue(), SDValue());
279+
}
280+
258281
static const int kSetTagLoopThreshold = 176;
259282

260283
static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,

llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,19 @@ class AArch64SelectionDAGInfo : public SelectionDAGGenTargetInfo {
5353
MachinePointerInfo DstPtrInfo,
5454
MachinePointerInfo SrcPtrInfo) const override;
5555

56+
std::pair<SDValue, SDValue>
57+
EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
58+
SDValue Src, SDValue Char, SDValue Length,
59+
MachinePointerInfo SrcPtrInfo) const override;
60+
5661
SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
5762
SDValue Chain, SDValue Op1, SDValue Op2,
5863
MachinePointerInfo DstPtrInfo,
5964
bool ZeroData) const override;
6065

6166
SDValue EmitStreamingCompatibleMemLibCall(SelectionDAG &DAG, const SDLoc &DL,
62-
SDValue Chain, SDValue Dst,
63-
SDValue Src, SDValue Size,
67+
SDValue Chain, SDValue Op0,
68+
SDValue Op1, SDValue Size,
6469
RTLIB::Libcall LC) const;
6570
};
6671
} // namespace llvm

llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll

Lines changed: 174 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
3-
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES
4-
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-COMMON,CHECK
3+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-COMMON,CHECK-NO-SME-ROUTINES
4+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-COMMON,CHECK-MOPS
55

66
@dst = global [512 x i8] zeroinitializer, align 1
77
@src = global [512 x i8] zeroinitializer, align 1
@@ -153,6 +153,172 @@ entry:
153153
ret void
154154
}
155155

156+
define ptr @se_memchr(ptr %src, i64 %n) "aarch64_pstate_sm_enabled" {
157+
; CHECK-LABEL: se_memchr:
158+
; CHECK: // %bb.0: // %entry
159+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
160+
; CHECK-NEXT: .cfi_def_cfa_offset 16
161+
; CHECK-NEXT: .cfi_offset w30, -16
162+
; CHECK-NEXT: mov x2, x1
163+
; CHECK-NEXT: mov w1, #5 // =0x5
164+
; CHECK-NEXT: bl __arm_sc_memchr
165+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
166+
; CHECK-NEXT: ret
167+
;
168+
; CHECK-NO-SME-ROUTINES-LABEL: se_memchr:
169+
; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
170+
; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
171+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_def_cfa_offset 96
172+
; CHECK-NO-SME-ROUTINES-NEXT: cntd x9
173+
; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
174+
; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
175+
; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
176+
; CHECK-NO-SME-ROUTINES-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
177+
; CHECK-NO-SME-ROUTINES-NEXT: str x9, [sp, #80] // 8-byte Spill
178+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset vg, -16
179+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w30, -24
180+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w29, -32
181+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b8, -40
182+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b9, -48
183+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b10, -56
184+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b11, -64
185+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b12, -72
186+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b13, -80
187+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b14, -88
188+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b15, -96
189+
; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x1
190+
; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
191+
; CHECK-NO-SME-ROUTINES-NEXT: mov w1, #5 // =0x5
192+
; CHECK-NO-SME-ROUTINES-NEXT: bl memchr
193+
; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
194+
; CHECK-NO-SME-ROUTINES-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
195+
; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
196+
; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
197+
; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
198+
; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
199+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_def_cfa_offset 0
200+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore vg
201+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w30
202+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w29
203+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b8
204+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b9
205+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b10
206+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b11
207+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b12
208+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b13
209+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b14
210+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b15
211+
; CHECK-NO-SME-ROUTINES-NEXT: ret
212+
;
213+
; CHECK-MOPS-LABEL: se_memchr:
214+
; CHECK-MOPS: // %bb.0: // %entry
215+
; CHECK-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
216+
; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 16
217+
; CHECK-MOPS-NEXT: .cfi_offset w30, -16
218+
; CHECK-MOPS-NEXT: mov x2, x1
219+
; CHECK-MOPS-NEXT: mov w1, #5 // =0x5
220+
; CHECK-MOPS-NEXT: bl __arm_sc_memchr
221+
; CHECK-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
222+
; CHECK-MOPS-NEXT: ret
223+
entry:
224+
%res = tail call ptr @memchr(ptr %src, i32 5, i64 %n)
225+
ret ptr %res
226+
}
227+
228+
define ptr @sc_memchr(ptr %src, i64 %n) "aarch64_pstate_sm_compatible" {
229+
; CHECK-LABEL: sc_memchr:
230+
; CHECK: // %bb.0: // %entry
231+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
232+
; CHECK-NEXT: .cfi_def_cfa_offset 16
233+
; CHECK-NEXT: .cfi_offset w30, -16
234+
; CHECK-NEXT: mov x2, x1
235+
; CHECK-NEXT: mov w1, #5 // =0x5
236+
; CHECK-NEXT: bl __arm_sc_memchr
237+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
238+
; CHECK-NEXT: ret
239+
;
240+
; CHECK-NO-SME-ROUTINES-LABEL: sc_memchr:
241+
; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
242+
; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
243+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_def_cfa_offset 96
244+
; CHECK-NO-SME-ROUTINES-NEXT: cntd x9
245+
; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
246+
; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
247+
; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
248+
; CHECK-NO-SME-ROUTINES-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
249+
; CHECK-NO-SME-ROUTINES-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
250+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w19, -8
251+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset vg, -16
252+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w30, -24
253+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w29, -32
254+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b8, -40
255+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b9, -48
256+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b10, -56
257+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b11, -64
258+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b12, -72
259+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b13, -80
260+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b14, -88
261+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b15, -96
262+
; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x1
263+
; CHECK-NO-SME-ROUTINES-NEXT: mrs x19, SVCR
264+
; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB4_2
265+
; CHECK-NO-SME-ROUTINES-NEXT: // %bb.1: // %entry
266+
; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
267+
; CHECK-NO-SME-ROUTINES-NEXT: .LBB4_2: // %entry
268+
; CHECK-NO-SME-ROUTINES-NEXT: mov w1, #5 // =0x5
269+
; CHECK-NO-SME-ROUTINES-NEXT: bl memchr
270+
; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB4_4
271+
; CHECK-NO-SME-ROUTINES-NEXT: // %bb.3: // %entry
272+
; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
273+
; CHECK-NO-SME-ROUTINES-NEXT: .LBB4_4: // %entry
274+
; CHECK-NO-SME-ROUTINES-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
275+
; CHECK-NO-SME-ROUTINES-NEXT: ldr x19, [sp, #88] // 8-byte Reload
276+
; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
277+
; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
278+
; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
279+
; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
280+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_def_cfa_offset 0
281+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w19
282+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore vg
283+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w30
284+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w29
285+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b8
286+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b9
287+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b10
288+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b11
289+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b12
290+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b13
291+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b14
292+
; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b15
293+
; CHECK-NO-SME-ROUTINES-NEXT: ret
294+
;
295+
; CHECK-MOPS-LABEL: sc_memchr:
296+
; CHECK-MOPS: // %bb.0: // %entry
297+
; CHECK-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
298+
; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 16
299+
; CHECK-MOPS-NEXT: .cfi_offset w30, -16
300+
; CHECK-MOPS-NEXT: mov x2, x1
301+
; CHECK-MOPS-NEXT: mov w1, #5 // =0x5
302+
; CHECK-MOPS-NEXT: bl __arm_sc_memchr
303+
; CHECK-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
304+
; CHECK-MOPS-NEXT: ret
305+
entry:
306+
%res = tail call ptr @memchr(ptr %src, i32 5, i64 %n)
307+
ret ptr %res
308+
}
309+
310+
; Non-streaming[-compatible] call to memchr.
311+
define ptr @ns_memcpy(ptr %src, i64 %n) {
312+
; CHECK-COMMON-LABEL: ns_memcpy:
313+
; CHECK-COMMON: // %bb.0: // %entry
314+
; CHECK-COMMON-NEXT: mov x2, x1
315+
; CHECK-COMMON-NEXT: mov w1, #5 // =0x5
316+
; CHECK-COMMON-NEXT: b memchr
317+
entry:
318+
%res = tail call ptr @memchr(ptr %src, i32 5, i64 %n)
319+
ret ptr %res
320+
}
321+
156322
define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind {
157323
; CHECK-LABEL: sc_memcpy:
158324
; CHECK: // %bb.0: // %entry
@@ -179,15 +345,15 @@ define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind {
179345
; CHECK-NO-SME-ROUTINES-NEXT: mrs x19, SVCR
180346
; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst]
181347
; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src]
182-
; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_2
348+
; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB6_2
183349
; CHECK-NO-SME-ROUTINES-NEXT: // %bb.1: // %entry
184350
; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
185-
; CHECK-NO-SME-ROUTINES-NEXT: .LBB3_2: // %entry
351+
; CHECK-NO-SME-ROUTINES-NEXT: .LBB6_2: // %entry
186352
; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy
187-
; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_4
353+
; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB6_4
188354
; CHECK-NO-SME-ROUTINES-NEXT: // %bb.3: // %entry
189355
; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
190-
; CHECK-NO-SME-ROUTINES-NEXT: .LBB3_4: // %entry
356+
; CHECK-NO-SME-ROUTINES-NEXT: .LBB6_4: // %entry
191357
; CHECK-NO-SME-ROUTINES-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
192358
; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
193359
; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
@@ -283,3 +449,4 @@ entry:
283449
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
284450
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
285451
declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
452+
declare ptr @memchr(ptr, i32, i64)

0 commit comments

Comments
 (0)