Skip to content

Commit 00a648a

Browse files
[X86] Elect to tail call when sret ptr is passed to the callee
We may be able to allow the callee to be tail-called when the caller expects a `sret` pointer argument, as long as this pointer is forwarded to the callee. Fixes: #146303.
1 parent 43ead21 commit 00a648a

File tree

2 files changed

+53
-93
lines changed

2 files changed

+53
-93
lines changed

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2781,6 +2781,38 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
27812781
return Bytes == MFI.getObjectSize(FI);
27822782
}
27832783

2784+
static bool
2785+
mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI,
2786+
Register CallerSRetReg) {
2787+
const auto &Outs = CLI.Outs;
2788+
const auto &OutVals = CLI.OutVals;
2789+
2790+
// We know the caller has a sret pointer argument (CallerSRetReg). Locate the
2791+
// operand index within the callee that may have a sret pointer too.
2792+
unsigned Pos = 0;
2793+
for (unsigned E = Outs.size(); Pos != E; ++Pos)
2794+
if (Outs[Pos].Flags.isSRet())
2795+
break;
2796+
// Bail out if the callee has not any sret argument.
2797+
if (Pos == Outs.size())
2798+
return false;
2799+
2800+
// At this point, either the caller is forwarding its sret argument to the
2801+
// callee, or the callee is being passed a different sret pointer. We now look
2802+
// for a CopyToReg, where the callee sret argument is written into a new vreg
2803+
// (which should later be %rax/%eax, if this is returned).
2804+
SDValue SRetArgVal = OutVals[Pos];
2805+
for (SDNode *User : SRetArgVal->users()) {
2806+
if (User->getOpcode() != ISD::CopyToReg)
2807+
continue;
2808+
Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
2809+
if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal)
2810+
return true;
2811+
}
2812+
2813+
return false;
2814+
}
2815+
27842816
/// Check whether the call is eligible for tail call optimization. Targets
27852817
/// that want to do tail call optimization should implement this function.
27862818
/// Note that the x86 backend does not check musttail calls for eligibility! The
@@ -2802,6 +2834,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
28022834

28032835
// If -tailcallopt is specified, make fastcc functions tail-callable.
28042836
MachineFunction &MF = DAG.getMachineFunction();
2837+
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
28052838
const Function &CallerF = MF.getFunction();
28062839

28072840
// If the function return type is x86_fp80 and the callee return type is not,
@@ -2838,14 +2871,15 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
28382871
if (RegInfo->hasStackRealignment(MF))
28392872
return false;
28402873

2841-
// Also avoid sibcall optimization if we're an sret return fn and the callee
2842-
// is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2843-
// insufficient.
2844-
if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2874+
// Avoid sibcall optimization if we are an sret return function and the callee
2875+
// is incompatible, unless such premises are proven wrong. See comment in
2876+
// LowerReturn about why hasStructRetAttr is insufficient.
2877+
if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
28452878
// For a compatible tail call the callee must return our sret pointer. So it
28462879
// needs to be (a) an sret function itself and (b) we pass our sret as its
28472880
// sret. Condition #b is harder to determine.
2848-
return false;
2881+
if (!mayBeSRetTailCallCompatible(CLI, SRetReg))
2882+
return false;
28492883
} else if (IsCalleePopSRet)
28502884
// The callee pops an sret, so we cannot tail-call, as our caller doesn't
28512885
// expect that.
@@ -2967,8 +3001,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
29673001
X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
29683002
MF.getTarget().Options.GuaranteedTailCallOpt);
29693003

2970-
if (unsigned BytesToPop =
2971-
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
3004+
if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
29723005
// If we have bytes to pop, the callee must pop them.
29733006
bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
29743007
if (!CalleePopMatches)

llvm/test/CodeGen/X86/sibcall.ll

Lines changed: 13 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -444,21 +444,11 @@ define dso_local void @t15(ptr noalias sret(%struct.foo) %agg.result) nounwind
444444
;
445445
; X64-LABEL: t15:
446446
; X64: # %bb.0:
447-
; X64-NEXT: pushq %rbx
448-
; X64-NEXT: movq %rdi, %rbx
449-
; X64-NEXT: callq f
450-
; X64-NEXT: movq %rbx, %rax
451-
; X64-NEXT: popq %rbx
452-
; X64-NEXT: retq
447+
; X64-NEXT: jmp f # TAILCALL
453448
;
454449
; X32-LABEL: t15:
455450
; X32: # %bb.0:
456-
; X32-NEXT: pushq %rbx
457-
; X32-NEXT: movq %rdi, %rbx
458-
; X32-NEXT: callq f
459-
; X32-NEXT: movl %ebx, %eax
460-
; X32-NEXT: popq %rbx
461-
; X32-NEXT: retq
451+
; X32-NEXT: jmp f # TAILCALL
462452
tail call fastcc void @f(ptr noalias sret(%struct.foo) %agg.result) nounwind
463453
ret void
464454
}
@@ -607,103 +597,50 @@ declare dso_local fastcc double @foo20(double) nounwind
607597
define fastcc void @t21_sret_to_sret(ptr noalias sret(%struct.foo) %agg.result) nounwind {
608598
; X86-LABEL: t21_sret_to_sret:
609599
; X86: # %bb.0:
610-
; X86-NEXT: pushl %esi
611-
; X86-NEXT: subl $8, %esp
612-
; X86-NEXT: movl %ecx, %esi
613-
; X86-NEXT: calll t21_f_sret
614-
; X86-NEXT: movl %esi, %eax
615-
; X86-NEXT: addl $8, %esp
616-
; X86-NEXT: popl %esi
617-
; X86-NEXT: retl
600+
; X86-NEXT: jmp t21_f_sret # TAILCALL
618601
;
619602
; X64-LABEL: t21_sret_to_sret:
620603
; X64: # %bb.0:
621-
; X64-NEXT: pushq %rbx
622-
; X64-NEXT: movq %rdi, %rbx
623-
; X64-NEXT: callq t21_f_sret
624-
; X64-NEXT: movq %rbx, %rax
625-
; X64-NEXT: popq %rbx
626-
; X64-NEXT: retq
604+
; X64-NEXT: jmp t21_f_sret # TAILCALL
627605
;
628606
; X32-LABEL: t21_sret_to_sret:
629607
; X32: # %bb.0:
630-
; X32-NEXT: pushq %rbx
631-
; X32-NEXT: movq %rdi, %rbx
632-
; X32-NEXT: callq t21_f_sret
633-
; X32-NEXT: movl %ebx, %eax
634-
; X32-NEXT: popq %rbx
635-
; X32-NEXT: retq
608+
; X32-NEXT: jmp t21_f_sret # TAILCALL
636609
tail call fastcc void @t21_f_sret(ptr noalias sret(%struct.foo) %agg.result) nounwind
637610
ret void
638611
}
639612

640613
define fastcc void @t21_sret_to_sret_more_args(ptr noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind {
641614
; X86-LABEL: t21_sret_to_sret_more_args:
642615
; X86: # %bb.0:
643-
; X86-NEXT: pushl %esi
644-
; X86-NEXT: subl $8, %esp
645-
; X86-NEXT: movl %ecx, %esi
646-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
647-
; X86-NEXT: movl %eax, (%esp)
648-
; X86-NEXT: calll f_sret@PLT
649-
; X86-NEXT: movl %esi, %eax
650-
; X86-NEXT: addl $8, %esp
651-
; X86-NEXT: popl %esi
652-
; X86-NEXT: retl
616+
; X86-NEXT: jmp f_sret@PLT # TAILCALL
653617
;
654618
; X64-LABEL: t21_sret_to_sret_more_args:
655619
; X64: # %bb.0:
656-
; X64-NEXT: pushq %rbx
657-
; X64-NEXT: movq %rdi, %rbx
658-
; X64-NEXT: callq f_sret@PLT
659-
; X64-NEXT: movq %rbx, %rax
660-
; X64-NEXT: popq %rbx
661-
; X64-NEXT: retq
620+
; X64-NEXT: jmp f_sret@PLT # TAILCALL
662621
;
663622
; X32-LABEL: t21_sret_to_sret_more_args:
664623
; X32: # %bb.0:
665-
; X32-NEXT: pushq %rbx
666-
; X32-NEXT: movq %rdi, %rbx
667-
; X32-NEXT: callq f_sret@PLT
668-
; X32-NEXT: movl %ebx, %eax
669-
; X32-NEXT: popq %rbx
670-
; X32-NEXT: retq
624+
; X32-NEXT: jmp f_sret@PLT # TAILCALL
671625
tail call fastcc void @f_sret(ptr noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind
672626
ret void
673627
}
674628

675629
define fastcc void @t21_sret_to_sret_second_arg_sret(ptr noalias %agg.result, ptr noalias sret(%struct.foo) %ret) nounwind {
676630
; X86-LABEL: t21_sret_to_sret_second_arg_sret:
677631
; X86: # %bb.0:
678-
; X86-NEXT: pushl %esi
679-
; X86-NEXT: subl $8, %esp
680-
; X86-NEXT: movl %edx, %esi
681632
; X86-NEXT: movl %edx, %ecx
682-
; X86-NEXT: calll t21_f_sret
683-
; X86-NEXT: movl %esi, %eax
684-
; X86-NEXT: addl $8, %esp
685-
; X86-NEXT: popl %esi
686-
; X86-NEXT: retl
633+
; X86-NEXT: jmp t21_f_sret # TAILCALL
687634
;
688635
; X64-LABEL: t21_sret_to_sret_second_arg_sret:
689636
; X64: # %bb.0:
690-
; X64-NEXT: pushq %rbx
691-
; X64-NEXT: movq %rsi, %rbx
692637
; X64-NEXT: movq %rsi, %rdi
693-
; X64-NEXT: callq t21_f_sret
694-
; X64-NEXT: movq %rbx, %rax
695-
; X64-NEXT: popq %rbx
696-
; X64-NEXT: retq
638+
; X64-NEXT: jmp t21_f_sret # TAILCALL
697639
;
698640
; X32-LABEL: t21_sret_to_sret_second_arg_sret:
699641
; X32: # %bb.0:
700-
; X32-NEXT: pushq %rbx
701-
; X32-NEXT: movq %rsi, %rbx
702642
; X32-NEXT: movq %rsi, %rdi
703-
; X32-NEXT: callq t21_f_sret
704-
; X32-NEXT: movl %ebx, %eax
705-
; X32-NEXT: popq %rbx
706-
; X32-NEXT: retq
643+
; X32-NEXT: jmp t21_f_sret # TAILCALL
707644
tail call fastcc void @t21_f_sret(ptr noalias sret(%struct.foo) %ret) nounwind
708645
ret void
709646
}
@@ -725,27 +662,17 @@ define fastcc void @t21_sret_to_sret_more_args2(ptr noalias sret(%struct.foo) %a
725662
;
726663
; X64-LABEL: t21_sret_to_sret_more_args2:
727664
; X64: # %bb.0:
728-
; X64-NEXT: pushq %rbx
729665
; X64-NEXT: movl %esi, %eax
730-
; X64-NEXT: movq %rdi, %rbx
731666
; X64-NEXT: movl %edx, %esi
732667
; X64-NEXT: movl %eax, %edx
733-
; X64-NEXT: callq f_sret@PLT
734-
; X64-NEXT: movq %rbx, %rax
735-
; X64-NEXT: popq %rbx
736-
; X64-NEXT: retq
668+
; X64-NEXT: jmp f_sret@PLT # TAILCALL
737669
;
738670
; X32-LABEL: t21_sret_to_sret_more_args2:
739671
; X32: # %bb.0:
740-
; X32-NEXT: pushq %rbx
741672
; X32-NEXT: movl %esi, %eax
742-
; X32-NEXT: movq %rdi, %rbx
743673
; X32-NEXT: movl %edx, %esi
744674
; X32-NEXT: movl %eax, %edx
745-
; X32-NEXT: callq f_sret@PLT
746-
; X32-NEXT: movl %ebx, %eax
747-
; X32-NEXT: popq %rbx
748-
; X32-NEXT: retq
675+
; X32-NEXT: jmp f_sret@PLT # TAILCALL
749676
tail call fastcc void @f_sret(ptr noalias sret(%struct.foo) %agg.result, i32 %b, i32 %a) nounwind
750677
ret void
751678
}

0 commit comments

Comments
 (0)