1321369Sdim//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2303231Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6303231Sdim//
7303231Sdim//===----------------------------------------------------------------------===//
8303231Sdim///
9303231Sdim/// \file
10303231Sdim/// This file implements the lowering of LLVM calls to machine code calls for
11303231Sdim/// GlobalISel.
12303231Sdim///
13303231Sdim//===----------------------------------------------------------------------===//
14303231Sdim
15303231Sdim#include "AArch64CallLowering.h"
16303231Sdim#include "AArch64ISelLowering.h"
17321369Sdim#include "AArch64MachineFunctionInfo.h"
18321369Sdim#include "AArch64Subtarget.h"
19321369Sdim#include "llvm/ADT/ArrayRef.h"
20321369Sdim#include "llvm/ADT/SmallVector.h"
21314564Sdim#include "llvm/CodeGen/Analysis.h"
22321369Sdim#include "llvm/CodeGen/CallingConvLower.h"
23303231Sdim#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24314564Sdim#include "llvm/CodeGen/GlobalISel/Utils.h"
25321369Sdim#include "llvm/CodeGen/LowLevelType.h"
26321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
27321369Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
28321369Sdim#include "llvm/CodeGen/MachineFunction.h"
29303231Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
30321369Sdim#include "llvm/CodeGen/MachineMemOperand.h"
31321369Sdim#include "llvm/CodeGen/MachineOperand.h"
32314564Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
33327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h"
34327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h"
35321369Sdim#include "llvm/CodeGen/ValueTypes.h"
36321369Sdim#include "llvm/IR/Argument.h"
37321369Sdim#include "llvm/IR/Attributes.h"
38321369Sdim#include "llvm/IR/Function.h"
39321369Sdim#include "llvm/IR/Type.h"
40321369Sdim#include "llvm/IR/Value.h"
41341825Sdim#include "llvm/Support/MachineValueType.h"
42321369Sdim#include <algorithm>
43321369Sdim#include <cassert>
44321369Sdim#include <cstdint>
45321369Sdim#include <iterator>
46321369Sdim
47353358Sdim#define DEBUG_TYPE "aarch64-call-lowering"
48353358Sdim
49303231Sdimusing namespace llvm;
50303231Sdim
51303231SdimAArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
52321369Sdim  : CallLowering(&TLI) {}
53303231Sdim
54327952Sdimnamespace {
55314564Sdimstruct IncomingArgHandler : public CallLowering::ValueHandler {
56321369Sdim  IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
57321369Sdim                     CCAssignFn *AssignFn)
58321369Sdim      : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
59314564Sdim
60353358Sdim  Register getStackAddress(uint64_t Size, int64_t Offset,
61314564Sdim                           MachinePointerInfo &MPO) override {
62314564Sdim    auto &MFI = MIRBuilder.getMF().getFrameInfo();
63314564Sdim    int FI = MFI.CreateFixedObject(Size, Offset, true);
64314564Sdim    MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
65353358Sdim    Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
66314564Sdim    MIRBuilder.buildFrameIndex(AddrReg, FI);
67321369Sdim    StackUsed = std::max(StackUsed, Size + Offset);
68314564Sdim    return AddrReg;
69314564Sdim  }
70314564Sdim
71353358Sdim  void assignValueToReg(Register ValVReg, Register PhysReg,
72314564Sdim                        CCValAssign &VA) override {
73314564Sdim    markPhysRegUsed(PhysReg);
74327952Sdim    switch (VA.getLocInfo()) {
75327952Sdim    default:
76327952Sdim      MIRBuilder.buildCopy(ValVReg, PhysReg);
77327952Sdim      break;
78327952Sdim    case CCValAssign::LocInfo::SExt:
79327952Sdim    case CCValAssign::LocInfo::ZExt:
80327952Sdim    case CCValAssign::LocInfo::AExt: {
81327952Sdim      auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
82327952Sdim      MIRBuilder.buildTrunc(ValVReg, Copy);
83327952Sdim      break;
84327952Sdim    }
85327952Sdim    }
86314564Sdim  }
87314564Sdim
88353358Sdim  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
89314564Sdim                            MachinePointerInfo &MPO, CCValAssign &VA) override {
90353358Sdim    // FIXME: Get alignment
91314564Sdim    auto MMO = MIRBuilder.getMF().getMachineMemOperand(
92314564Sdim        MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
93353358Sdim        1);
94314564Sdim    MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
95314564Sdim  }
96314564Sdim
97314564Sdim  /// How the physical register gets marked varies between formal
98314564Sdim  /// parameters (it's a basic-block live-in), and a call instruction
99314564Sdim  /// (it's an implicit-def of the BL).
100314564Sdim  virtual void markPhysRegUsed(unsigned PhysReg) = 0;
101321369Sdim
102360784Sdim  bool isIncomingArgumentHandler() const override { return true; }
103353358Sdim
104321369Sdim  uint64_t StackUsed;
105314564Sdim};
106314564Sdim
107314564Sdimstruct FormalArgHandler : public IncomingArgHandler {
108321369Sdim  FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
109321369Sdim                   CCAssignFn *AssignFn)
110321369Sdim    : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
111314564Sdim
112314564Sdim  void markPhysRegUsed(unsigned PhysReg) override {
113360784Sdim    MIRBuilder.getMRI()->addLiveIn(PhysReg);
114314564Sdim    MIRBuilder.getMBB().addLiveIn(PhysReg);
115314564Sdim  }
116314564Sdim};
117314564Sdim
118314564Sdimstruct CallReturnHandler : public IncomingArgHandler {
119314564Sdim  CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
120321369Sdim                    MachineInstrBuilder MIB, CCAssignFn *AssignFn)
121321369Sdim    : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
122314564Sdim
123314564Sdim  void markPhysRegUsed(unsigned PhysReg) override {
124314564Sdim    MIB.addDef(PhysReg, RegState::Implicit);
125314564Sdim  }
126314564Sdim
127314564Sdim  MachineInstrBuilder MIB;
128314564Sdim};
129314564Sdim
130314564Sdimstruct OutgoingArgHandler : public CallLowering::ValueHandler {
131314564Sdim  OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
132321369Sdim                     MachineInstrBuilder MIB, CCAssignFn *AssignFn,
133360784Sdim                     CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
134360784Sdim                     int FPDiff = 0)
135321369Sdim      : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
136360784Sdim        AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
137360784Sdim        StackSize(0) {}
138314564Sdim
139360784Sdim  bool isIncomingArgumentHandler() const override { return false; }
140360784Sdim
141353358Sdim  Register getStackAddress(uint64_t Size, int64_t Offset,
142314564Sdim                           MachinePointerInfo &MPO) override {
143360784Sdim    MachineFunction &MF = MIRBuilder.getMF();
144314564Sdim    LLT p0 = LLT::pointer(0, 64);
145314564Sdim    LLT s64 = LLT::scalar(64);
146360784Sdim
147360784Sdim    if (IsTailCall) {
148360784Sdim      Offset += FPDiff;
149360784Sdim      int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
150360784Sdim      Register FIReg = MRI.createGenericVirtualRegister(p0);
151360784Sdim      MIRBuilder.buildFrameIndex(FIReg, FI);
152360784Sdim      MPO = MachinePointerInfo::getFixedStack(MF, FI);
153360784Sdim      return FIReg;
154360784Sdim    }
155360784Sdim
156353358Sdim    Register SPReg = MRI.createGenericVirtualRegister(p0);
157353358Sdim    MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
158314564Sdim
159353358Sdim    Register OffsetReg = MRI.createGenericVirtualRegister(s64);
160314564Sdim    MIRBuilder.buildConstant(OffsetReg, Offset);
161314564Sdim
162353358Sdim    Register AddrReg = MRI.createGenericVirtualRegister(p0);
163360784Sdim    MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
164314564Sdim
165360784Sdim    MPO = MachinePointerInfo::getStack(MF, Offset);
166314564Sdim    return AddrReg;
167314564Sdim  }
168314564Sdim
169353358Sdim  void assignValueToReg(Register ValVReg, Register PhysReg,
170314564Sdim                        CCValAssign &VA) override {
171314564Sdim    MIB.addUse(PhysReg, RegState::Implicit);
172353358Sdim    Register ExtReg = extendRegister(ValVReg, VA);
173314564Sdim    MIRBuilder.buildCopy(PhysReg, ExtReg);
174314564Sdim  }
175314564Sdim
176353358Sdim  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
177314564Sdim                            MachinePointerInfo &MPO, CCValAssign &VA) override {
178341825Sdim    if (VA.getLocInfo() == CCValAssign::LocInfo::AExt) {
179341825Sdim      Size = VA.getLocVT().getSizeInBits() / 8;
180341825Sdim      ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(Size * 8), ValVReg)
181341825Sdim                    ->getOperand(0)
182341825Sdim                    .getReg();
183341825Sdim    }
184314564Sdim    auto MMO = MIRBuilder.getMF().getMachineMemOperand(
185353358Sdim        MPO, MachineMemOperand::MOStore, Size, 1);
186314564Sdim    MIRBuilder.buildStore(ValVReg, Addr, *MMO);
187314564Sdim  }
188314564Sdim
189321369Sdim  bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
190321369Sdim                 CCValAssign::LocInfo LocInfo,
191321369Sdim                 const CallLowering::ArgInfo &Info,
192360784Sdim                 ISD::ArgFlagsTy Flags,
193321369Sdim                 CCState &State) override {
194321369Sdim    bool Res;
195321369Sdim    if (Info.IsFixed)
196360784Sdim      Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
197321369Sdim    else
198360784Sdim      Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
199321369Sdim
200321369Sdim    StackSize = State.getNextStackOffset();
201321369Sdim    return Res;
202321369Sdim  }
203321369Sdim
204314564Sdim  MachineInstrBuilder MIB;
205321369Sdim  CCAssignFn *AssignFnVarArg;
206360784Sdim  bool IsTailCall;
207360784Sdim
208360784Sdim  /// For tail calls, the byte offset of the call's argument area from the
209360784Sdim  /// callee's. Unused elsewhere.
210360784Sdim  int FPDiff;
211321369Sdim  uint64_t StackSize;
212314564Sdim};
213327952Sdim} // namespace
214314564Sdim
215360784Sdimstatic bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
216360784Sdim  return CallConv == CallingConv::Fast && TailCallOpt;
217360784Sdim}
218360784Sdim
219321369Sdimvoid AArch64CallLowering::splitToValueTypes(
220321369Sdim    const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
221353358Sdim    const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
222314564Sdim  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
223314564Sdim  LLVMContext &Ctx = OrigArg.Ty->getContext();
224314564Sdim
225341825Sdim  if (OrigArg.Ty->isVoidTy())
226341825Sdim    return;
227341825Sdim
228314564Sdim  SmallVector<EVT, 4> SplitVTs;
229314564Sdim  SmallVector<uint64_t, 4> Offsets;
230314564Sdim  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
231314564Sdim
232314564Sdim  if (SplitVTs.size() == 1) {
233314564Sdim    // No splitting to do, but we want to replace the original type (e.g. [1 x
234314564Sdim    // double] -> double).
235353358Sdim    SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
236360784Sdim                           OrigArg.Flags[0], OrigArg.IsFixed);
237314564Sdim    return;
238314564Sdim  }
239314564Sdim
240353358Sdim  // Create one ArgInfo for each virtual register in the original ArgInfo.
241353358Sdim  assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
242353358Sdim
243327952Sdim  bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
244327952Sdim      OrigArg.Ty, CallConv, false);
245353358Sdim  for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
246353358Sdim    Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
247360784Sdim    SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
248353358Sdim                           OrigArg.IsFixed);
249327952Sdim    if (NeedsRegBlock)
250360784Sdim      SplitArgs.back().Flags[0].setInConsecutiveRegs();
251314564Sdim  }
252314564Sdim
253360784Sdim  SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
254314564Sdim}
255314564Sdim
256303231Sdimbool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
257344779Sdim                                      const Value *Val,
258353358Sdim                                      ArrayRef<Register> VRegs,
259353358Sdim                                      Register SwiftErrorVReg) const {
260344779Sdim  auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
261344779Sdim  assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
262344779Sdim         "Return value without a vreg");
263303231Sdim
264314564Sdim  bool Success = true;
265344779Sdim  if (!VRegs.empty()) {
266344779Sdim    MachineFunction &MF = MIRBuilder.getMF();
267344779Sdim    const Function &F = MF.getFunction();
268344779Sdim
269341825Sdim    MachineRegisterInfo &MRI = MF.getRegInfo();
270314564Sdim    const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
271314564Sdim    CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
272314564Sdim    auto &DL = F.getParent()->getDataLayout();
273344779Sdim    LLVMContext &Ctx = Val->getType()->getContext();
274314564Sdim
275344779Sdim    SmallVector<EVT, 4> SplitEVTs;
276344779Sdim    ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
277344779Sdim    assert(VRegs.size() == SplitEVTs.size() &&
278344779Sdim           "For each split Type there should be exactly one VReg.");
279314564Sdim
280314564Sdim    SmallVector<ArgInfo, 8> SplitArgs;
281353358Sdim    CallingConv::ID CC = F.getCallingConv();
282353358Sdim
283344779Sdim    for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
284353358Sdim      if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
285353358Sdim        LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
286353358Sdim        return false;
287344779Sdim      }
288314564Sdim
289353358Sdim      Register CurVReg = VRegs[i];
290344779Sdim      ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
291344779Sdim      setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
292353358Sdim
293353358Sdim      // i1 is a special case because SDAG i1 true is naturally zero extended
294353358Sdim      // when widened using ANYEXT. We need to do it explicitly here.
295353358Sdim      if (MRI.getType(CurVReg).getSizeInBits() == 1) {
296353358Sdim        CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
297353358Sdim      } else {
298353358Sdim        // Some types will need extending as specified by the CC.
299353358Sdim        MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
300353358Sdim        if (EVT(NewVT) != SplitEVTs[i]) {
301353358Sdim          unsigned ExtendOp = TargetOpcode::G_ANYEXT;
302353358Sdim          if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
303353358Sdim                                             Attribute::SExt))
304353358Sdim            ExtendOp = TargetOpcode::G_SEXT;
305353358Sdim          else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
306353358Sdim                                                  Attribute::ZExt))
307353358Sdim            ExtendOp = TargetOpcode::G_ZEXT;
308353358Sdim
309353358Sdim          LLT NewLLT(NewVT);
310353358Sdim          LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
311353358Sdim          CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
312353358Sdim          // Instead of an extend, we might have a vector type which needs
313353358Sdim          // padding with more elements, e.g. <2 x half> -> <4 x half>.
314353358Sdim          if (NewVT.isVector()) {
315353358Sdim            if (OldLLT.isVector()) {
316353358Sdim              if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
317353358Sdim                // We don't handle VA types which are not exactly twice the
318353358Sdim                // size, but can easily be done in future.
319353358Sdim                if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
320353358Sdim                  LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
321353358Sdim                  return false;
322353358Sdim                }
323353358Sdim                auto Undef = MIRBuilder.buildUndef({OldLLT});
324353358Sdim                CurVReg =
325353358Sdim                    MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef.getReg(0)})
326353358Sdim                        .getReg(0);
327353358Sdim              } else {
328353358Sdim                // Just do a vector extend.
329353358Sdim                CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
330353358Sdim                              .getReg(0);
331353358Sdim              }
332353358Sdim            } else if (NewLLT.getNumElements() == 2) {
333353358Sdim              // We need to pad a <1 x S> type to <2 x S>. Since we don't have
334353358Sdim              // <1 x S> vector types in GISel we use a build_vector instead
335353358Sdim              // of a vector merge/concat.
336353358Sdim              auto Undef = MIRBuilder.buildUndef({OldLLT});
337353358Sdim              CurVReg =
338353358Sdim                  MIRBuilder
339353358Sdim                      .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
340353358Sdim                      .getReg(0);
341353358Sdim            } else {
342353358Sdim              LLVM_DEBUG(dbgs() << "Could not handle ret ty");
343353358Sdim              return false;
344353358Sdim            }
345353358Sdim          } else {
346353358Sdim            // A scalar extend.
347353358Sdim            CurVReg =
348353358Sdim                MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0);
349353358Sdim          }
350353358Sdim        }
351353358Sdim      }
352353358Sdim      if (CurVReg != CurArgInfo.Regs[0]) {
353353358Sdim        CurArgInfo.Regs[0] = CurVReg;
354353358Sdim        // Reset the arg flags after modifying CurVReg.
355353358Sdim        setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
356353358Sdim      }
357353358Sdim     splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC);
358344779Sdim    }
359344779Sdim
360321369Sdim    OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
361321369Sdim    Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
362303231Sdim  }
363314564Sdim
364353358Sdim  if (SwiftErrorVReg) {
365353358Sdim    MIB.addUse(AArch64::X21, RegState::Implicit);
366353358Sdim    MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
367353358Sdim  }
368353358Sdim
369314564Sdim  MIRBuilder.insertInstr(MIB);
370314564Sdim  return Success;
371314564Sdim}
372314564Sdim
373360784Sdim/// Helper function to compute forwarded registers for musttail calls. Computes
374360784Sdim/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
375360784Sdim/// can be used to save + restore registers later.
376360784Sdimstatic void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
377360784Sdim                                             CCAssignFn *AssignFn) {
378360784Sdim  MachineBasicBlock &MBB = MIRBuilder.getMBB();
379360784Sdim  MachineFunction &MF = MIRBuilder.getMF();
380360784Sdim  MachineFrameInfo &MFI = MF.getFrameInfo();
381360784Sdim
382360784Sdim  if (!MFI.hasMustTailInVarArgFunc())
383360784Sdim    return;
384360784Sdim
385360784Sdim  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
386360784Sdim  const Function &F = MF.getFunction();
387360784Sdim  assert(F.isVarArg() && "Expected F to be vararg?");
388360784Sdim
389360784Sdim  // Compute the set of forwarded registers. The rest are scratch.
390360784Sdim  SmallVector<CCValAssign, 16> ArgLocs;
391360784Sdim  CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
392360784Sdim                 F.getContext());
393360784Sdim  SmallVector<MVT, 2> RegParmTypes;
394360784Sdim  RegParmTypes.push_back(MVT::i64);
395360784Sdim  RegParmTypes.push_back(MVT::f128);
396360784Sdim
397360784Sdim  // Later on, we can use this vector to restore the registers if necessary.
398360784Sdim  SmallVectorImpl<ForwardedRegister> &Forwards =
399360784Sdim      FuncInfo->getForwardedMustTailRegParms();
400360784Sdim  CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
401360784Sdim
402360784Sdim  // Conservatively forward X8, since it might be used for an aggregate
403360784Sdim  // return.
404360784Sdim  if (!CCInfo.isAllocated(AArch64::X8)) {
405360784Sdim    unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
406360784Sdim    Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
407360784Sdim  }
408360784Sdim
409360784Sdim  // Add the forwards to the MachineBasicBlock and MachineFunction.
410360784Sdim  for (const auto &F : Forwards) {
411360784Sdim    MBB.addLiveIn(F.PReg);
412360784Sdim    MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
413360784Sdim  }
414360784Sdim}
415360784Sdim
416353358Sdimbool AArch64CallLowering::lowerFormalArguments(
417353358Sdim    MachineIRBuilder &MIRBuilder, const Function &F,
418353358Sdim    ArrayRef<ArrayRef<Register>> VRegs) const {
419314564Sdim  MachineFunction &MF = MIRBuilder.getMF();
420314564Sdim  MachineBasicBlock &MBB = MIRBuilder.getMBB();
421314564Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
422314564Sdim  auto &DL = F.getParent()->getDataLayout();
423314564Sdim
424314564Sdim  SmallVector<ArgInfo, 8> SplitArgs;
425314564Sdim  unsigned i = 0;
426321369Sdim  for (auto &Arg : F.args()) {
427327952Sdim    if (DL.getTypeStoreSize(Arg.getType()) == 0)
428327952Sdim      continue;
429353358Sdim
430314564Sdim    ArgInfo OrigArg{VRegs[i], Arg.getType()};
431321369Sdim    setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
432321369Sdim
433353358Sdim    splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv());
434314564Sdim    ++i;
435314564Sdim  }
436314564Sdim
437314564Sdim  if (!MBB.empty())
438314564Sdim    MIRBuilder.setInstr(*MBB.begin());
439314564Sdim
440314564Sdim  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
441314564Sdim  CCAssignFn *AssignFn =
442314564Sdim      TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
443314564Sdim
444321369Sdim  FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
445321369Sdim  if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
446314564Sdim    return false;
447314564Sdim
448360784Sdim  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
449360784Sdim  uint64_t StackOffset = Handler.StackUsed;
450321369Sdim  if (F.isVarArg()) {
451360784Sdim    auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
452360784Sdim    if (!Subtarget.isTargetDarwin()) {
453360784Sdim        // FIXME: we need to reimplement saveVarArgsRegisters from
454321369Sdim      // AArch64ISelLowering.
455321369Sdim      return false;
456321369Sdim    }
457321369Sdim
458360784Sdim    // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
459360784Sdim    StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
460321369Sdim
461321369Sdim    auto &MFI = MIRBuilder.getMF().getFrameInfo();
462321369Sdim    FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
463321369Sdim  }
464321369Sdim
465360784Sdim  if (doesCalleeRestoreStack(F.getCallingConv(),
466360784Sdim                             MF.getTarget().Options.GuaranteedTailCallOpt)) {
467360784Sdim    // We have a non-standard ABI, so why not make full use of the stack that
468360784Sdim    // we're going to pop? It must be aligned to 16 B in any case.
469360784Sdim    StackOffset = alignTo(StackOffset, 16);
470360784Sdim
471360784Sdim    // If we're expected to restore the stack (e.g. fastcc), then we'll be
472360784Sdim    // adding a multiple of 16.
473360784Sdim    FuncInfo->setArgumentStackToRestore(StackOffset);
474360784Sdim
475360784Sdim    // Our own callers will guarantee that the space is free by giving an
476360784Sdim    // aligned value to CALLSEQ_START.
477360784Sdim  }
478360784Sdim
479360784Sdim  // When we tail call, we need to check if the callee's arguments
480360784Sdim  // will fit on the caller's stack. So, whenever we lower formal arguments,
481360784Sdim  // we should keep track of this information, since we might lower a tail call
482360784Sdim  // in this function later.
483360784Sdim  FuncInfo->setBytesInStackArgArea(StackOffset);
484360784Sdim
485344779Sdim  auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
486344779Sdim  if (Subtarget.hasCustomCallingConv())
487344779Sdim    Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
488344779Sdim
489360784Sdim  handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
490360784Sdim
491314564Sdim  // Move back to the end of the basic block.
492314564Sdim  MIRBuilder.setMBB(MBB);
493314564Sdim
494303231Sdim  return true;
495303231Sdim}
496303231Sdim
497360784Sdim/// Return true if the calling convention is one that we can guarantee TCO for.
498360784Sdimstatic bool canGuaranteeTCO(CallingConv::ID CC) {
499360784Sdim  return CC == CallingConv::Fast;
500360784Sdim}
501360784Sdim
502360784Sdim/// Return true if we might ever do TCO for calls with this calling convention.
503360784Sdimstatic bool mayTailCallThisCC(CallingConv::ID CC) {
504360784Sdim  switch (CC) {
505360784Sdim  case CallingConv::C:
506360784Sdim  case CallingConv::PreserveMost:
507360784Sdim  case CallingConv::Swift:
508360784Sdim    return true;
509360784Sdim  default:
510360784Sdim    return canGuaranteeTCO(CC);
511360784Sdim  }
512360784Sdim}
513360784Sdim
514360784Sdim/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
515360784Sdim/// CC.
516360784Sdimstatic std::pair<CCAssignFn *, CCAssignFn *>
517360784SdimgetAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
518360784Sdim  return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
519360784Sdim}
520360784Sdim
521360784Sdimbool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
522360784Sdim    CallLoweringInfo &Info, MachineFunction &MF,
523360784Sdim    SmallVectorImpl<ArgInfo> &InArgs) const {
524360784Sdim  const Function &CallerF = MF.getFunction();
525360784Sdim  CallingConv::ID CalleeCC = Info.CallConv;
526360784Sdim  CallingConv::ID CallerCC = CallerF.getCallingConv();
527360784Sdim
528360784Sdim  // If the calling conventions match, then everything must be the same.
529360784Sdim  if (CalleeCC == CallerCC)
530360784Sdim    return true;
531360784Sdim
532360784Sdim  // Check if the caller and callee will handle arguments in the same way.
533360784Sdim  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
534360784Sdim  CCAssignFn *CalleeAssignFnFixed;
535360784Sdim  CCAssignFn *CalleeAssignFnVarArg;
536360784Sdim  std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
537360784Sdim      getAssignFnsForCC(CalleeCC, TLI);
538360784Sdim
539360784Sdim  CCAssignFn *CallerAssignFnFixed;
540360784Sdim  CCAssignFn *CallerAssignFnVarArg;
541360784Sdim  std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
542360784Sdim      getAssignFnsForCC(CallerCC, TLI);
543360784Sdim
544360784Sdim  if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed,
545360784Sdim                         *CalleeAssignFnVarArg, *CallerAssignFnFixed,
546360784Sdim                         *CallerAssignFnVarArg))
547360784Sdim    return false;
548360784Sdim
549360784Sdim  // Make sure that the caller and callee preserve all of the same registers.
550360784Sdim  auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
551360784Sdim  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
552360784Sdim  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
553360784Sdim  if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
554360784Sdim    TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
555360784Sdim    TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
556360784Sdim  }
557360784Sdim
558360784Sdim  return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
559360784Sdim}
560360784Sdim
561360784Sdimbool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
562360784Sdim    CallLoweringInfo &Info, MachineFunction &MF,
563360784Sdim    SmallVectorImpl<ArgInfo> &OutArgs) const {
564360784Sdim  // If there are no outgoing arguments, then we are done.
565360784Sdim  if (OutArgs.empty())
566360784Sdim    return true;
567360784Sdim
568360784Sdim  const Function &CallerF = MF.getFunction();
569360784Sdim  CallingConv::ID CalleeCC = Info.CallConv;
570360784Sdim  CallingConv::ID CallerCC = CallerF.getCallingConv();
571360784Sdim  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
572360784Sdim
573360784Sdim  CCAssignFn *AssignFnFixed;
574360784Sdim  CCAssignFn *AssignFnVarArg;
575360784Sdim  std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
576360784Sdim
577360784Sdim  // We have outgoing arguments. Make sure that we can tail call with them.
578360784Sdim  SmallVector<CCValAssign, 16> OutLocs;
579360784Sdim  CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
580360784Sdim
581360784Sdim  if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) {
582360784Sdim    LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
583360784Sdim    return false;
584360784Sdim  }
585360784Sdim
586360784Sdim  // Make sure that they can fit on the caller's stack.
587360784Sdim  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
588360784Sdim  if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
589360784Sdim    LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
590360784Sdim    return false;
591360784Sdim  }
592360784Sdim
593360784Sdim  // Verify that the parameters in callee-saved registers match.
594360784Sdim  // TODO: Port this over to CallLowering as general code once swiftself is
595360784Sdim  // supported.
596360784Sdim  auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
597360784Sdim  const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
598360784Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
599360784Sdim
600360784Sdim  for (unsigned i = 0; i < OutLocs.size(); ++i) {
601360784Sdim    auto &ArgLoc = OutLocs[i];
602360784Sdim    // If it's not a register, it's fine.
603360784Sdim    if (!ArgLoc.isRegLoc()) {
604360784Sdim      if (Info.IsVarArg) {
605360784Sdim        // Be conservative and disallow variadic memory operands to match SDAG's
606360784Sdim        // behaviour.
607360784Sdim        // FIXME: If the caller's calling convention is C, then we can
608360784Sdim        // potentially use its argument area. However, for cases like fastcc,
609360784Sdim        // we can't do anything.
610360784Sdim        LLVM_DEBUG(
611360784Sdim            dbgs()
612360784Sdim            << "... Cannot tail call vararg function with stack arguments\n");
613360784Sdim        return false;
614360784Sdim      }
615360784Sdim      continue;
616360784Sdim    }
617360784Sdim
618360784Sdim    Register Reg = ArgLoc.getLocReg();
619360784Sdim
620360784Sdim    // Only look at callee-saved registers.
621360784Sdim    if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
622360784Sdim      continue;
623360784Sdim
624360784Sdim    LLVM_DEBUG(
625360784Sdim        dbgs()
626360784Sdim        << "... Call has an argument passed in a callee-saved register.\n");
627360784Sdim
628360784Sdim    // Check if it was copied from.
629360784Sdim    ArgInfo &OutInfo = OutArgs[i];
630360784Sdim
631360784Sdim    if (OutInfo.Regs.size() > 1) {
632360784Sdim      LLVM_DEBUG(
633360784Sdim          dbgs() << "... Cannot handle arguments in multiple registers.\n");
634360784Sdim      return false;
635360784Sdim    }
636360784Sdim
637360784Sdim    // Check if we copy the register, walking through copies from virtual
638360784Sdim    // registers. Note that getDefIgnoringCopies does not ignore copies from
639360784Sdim    // physical registers.
640360784Sdim    MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
641360784Sdim    if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
642360784Sdim      LLVM_DEBUG(
643360784Sdim          dbgs()
644360784Sdim          << "... Parameter was not copied into a VReg, cannot tail call.\n");
645360784Sdim      return false;
646360784Sdim    }
647360784Sdim
648360784Sdim    // Got a copy. Verify that it's the same as the register we want.
649360784Sdim    Register CopyRHS = RegDef->getOperand(1).getReg();
650360784Sdim    if (CopyRHS != Reg) {
651360784Sdim      LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
652360784Sdim                           "VReg, cannot tail call.\n");
653360784Sdim      return false;
654360784Sdim    }
655360784Sdim  }
656360784Sdim
657360784Sdim  return true;
658360784Sdim}
659360784Sdim
660360784Sdimbool AArch64CallLowering::isEligibleForTailCallOptimization(
661360784Sdim    MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
662360784Sdim    SmallVectorImpl<ArgInfo> &InArgs,
663360784Sdim    SmallVectorImpl<ArgInfo> &OutArgs) const {
664360784Sdim
665360784Sdim  // Must pass all target-independent checks in order to tail call optimize.
666360784Sdim  if (!Info.IsTailCall)
667360784Sdim    return false;
668360784Sdim
669360784Sdim  CallingConv::ID CalleeCC = Info.CallConv;
670360784Sdim  MachineFunction &MF = MIRBuilder.getMF();
671360784Sdim  const Function &CallerF = MF.getFunction();
672360784Sdim
673360784Sdim  LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
674360784Sdim
675360784Sdim  if (Info.SwiftErrorVReg) {
676360784Sdim    // TODO: We should handle this.
677360784Sdim    // Note that this is also handled by the check for no outgoing arguments.
678360784Sdim    // Proactively disabling this though, because the swifterror handling in
679360784Sdim    // lowerCall inserts a COPY *after* the location of the call.
680360784Sdim    LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
681360784Sdim    return false;
682360784Sdim  }
683360784Sdim
684360784Sdim  if (!mayTailCallThisCC(CalleeCC)) {
685360784Sdim    LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
686360784Sdim    return false;
687360784Sdim  }
688360784Sdim
689360784Sdim  // Byval parameters hand the function a pointer directly into the stack area
690360784Sdim  // we want to reuse during a tail call. Working around this *is* possible (see
691360784Sdim  // X86).
692360784Sdim  //
693360784Sdim  // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
694360784Sdim  // it?
695360784Sdim  //
696360784Sdim  // On Windows, "inreg" attributes signify non-aggregate indirect returns.
697360784Sdim  // In this case, it is necessary to save/restore X0 in the callee. Tail
698360784Sdim  // call opt interferes with this. So we disable tail call opt when the
699360784Sdim  // caller has an argument with "inreg" attribute.
700360784Sdim  //
701360784Sdim  // FIXME: Check whether the callee also has an "inreg" argument.
702360784Sdim  //
703360784Sdim  // When the caller has a swifterror argument, we don't want to tail call
704360784Sdim  // because would have to move into the swifterror register before the
705360784Sdim  // tail call.
706360784Sdim  if (any_of(CallerF.args(), [](const Argument &A) {
707360784Sdim        return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
708360784Sdim      })) {
709360784Sdim    LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
710360784Sdim                         "inreg, or swifterror arguments\n");
711360784Sdim    return false;
712360784Sdim  }
713360784Sdim
714360784Sdim  // Externally-defined functions with weak linkage should not be
715360784Sdim  // tail-called on AArch64 when the OS does not support dynamic
716360784Sdim  // pre-emption of symbols, as the AAELF spec requires normal calls
717360784Sdim  // to undefined weak functions to be replaced with a NOP or jump to the
718360784Sdim  // next instruction. The behaviour of branch instructions in this
719360784Sdim  // situation (as used for tail calls) is implementation-defined, so we
720360784Sdim  // cannot rely on the linker replacing the tail call with a return.
721360784Sdim  if (Info.Callee.isGlobal()) {
722360784Sdim    const GlobalValue *GV = Info.Callee.getGlobal();
723360784Sdim    const Triple &TT = MF.getTarget().getTargetTriple();
724360784Sdim    if (GV->hasExternalWeakLinkage() &&
725360784Sdim        (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
726360784Sdim         TT.isOSBinFormatMachO())) {
727360784Sdim      LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
728360784Sdim                           "with weak linkage for this OS.\n");
729360784Sdim      return false;
730360784Sdim    }
731360784Sdim  }
732360784Sdim
733360784Sdim  // If we have -tailcallopt, then we're done.
734360784Sdim  if (MF.getTarget().Options.GuaranteedTailCallOpt)
735360784Sdim    return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
736360784Sdim
737360784Sdim  // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
738360784Sdim  // Try to find cases where we can do that.
739360784Sdim
740360784Sdim  // I want anyone implementing a new calling convention to think long and hard
741360784Sdim  // about this assert.
742360784Sdim  assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
743360784Sdim         "Unexpected variadic calling convention");
744360784Sdim
745360784Sdim  // Verify that the incoming and outgoing arguments from the callee are
746360784Sdim  // safe to tail call.
747360784Sdim  if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
748360784Sdim    LLVM_DEBUG(
749360784Sdim        dbgs()
750360784Sdim        << "... Caller and callee have incompatible calling conventions.\n");
751360784Sdim    return false;
752360784Sdim  }
753360784Sdim
754360784Sdim  if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
755360784Sdim    return false;
756360784Sdim
757360784Sdim  LLVM_DEBUG(
758360784Sdim      dbgs() << "... Call is eligible for tail call optimization.\n");
759360784Sdim  return true;
760360784Sdim}
761360784Sdim
762360784Sdimstatic unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
763360784Sdim                              bool IsTailCall) {
764360784Sdim  if (!IsTailCall)
765360784Sdim    return IsIndirect ? AArch64::BLR : AArch64::BL;
766360784Sdim
767360784Sdim  if (!IsIndirect)
768360784Sdim    return AArch64::TCRETURNdi;
769360784Sdim
770360784Sdim  // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
771360784Sdim  // x16 or x17.
772360784Sdim  if (CallerF.hasFnAttribute("branch-target-enforcement"))
773360784Sdim    return AArch64::TCRETURNriBTI;
774360784Sdim
775360784Sdim  return AArch64::TCRETURNri;
776360784Sdim}
777360784Sdim
778360784Sdimbool AArch64CallLowering::lowerTailCall(
779360784Sdim    MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
780360784Sdim    SmallVectorImpl<ArgInfo> &OutArgs) const {
781360784Sdim  MachineFunction &MF = MIRBuilder.getMF();
782360784Sdim  const Function &F = MF.getFunction();
783360784Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
784360784Sdim  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
785360784Sdim  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
786360784Sdim
787360784Sdim  // True when we're tail calling, but without -tailcallopt.
788360784Sdim  bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
789360784Sdim
790360784Sdim  // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
791360784Sdim  // register class. Until we can do that, we should fall back here.
792360784Sdim  if (F.hasFnAttribute("branch-target-enforcement")) {
793360784Sdim    LLVM_DEBUG(
794360784Sdim        dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
795360784Sdim    return false;
796360784Sdim  }
797360784Sdim
798360784Sdim  // Find out which ABI gets to decide where things go.
799360784Sdim  CallingConv::ID CalleeCC = Info.CallConv;
800360784Sdim  CCAssignFn *AssignFnFixed;
801360784Sdim  CCAssignFn *AssignFnVarArg;
802360784Sdim  std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
803360784Sdim
804360784Sdim  MachineInstrBuilder CallSeqStart;
805360784Sdim  if (!IsSibCall)
806360784Sdim    CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
807360784Sdim
808360784Sdim  unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
809360784Sdim  auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
810360784Sdim  MIB.add(Info.Callee);
811360784Sdim
812360784Sdim  // Byte offset for the tail call. When we are sibcalling, this will always
813360784Sdim  // be 0.
814360784Sdim  MIB.addImm(0);
815360784Sdim
816360784Sdim  // Tell the call which registers are clobbered.
817360784Sdim  auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
818360784Sdim  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
819360784Sdim  if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
820360784Sdim    TRI->UpdateCustomCallPreservedMask(MF, &Mask);
821360784Sdim  MIB.addRegMask(Mask);
822360784Sdim
823360784Sdim  if (TRI->isAnyArgRegReserved(MF))
824360784Sdim    TRI->emitReservedArgRegCallError(MF);
825360784Sdim
826360784Sdim  // FPDiff is the byte offset of the call's argument area from the callee's.
827360784Sdim  // Stores to callee stack arguments will be placed in FixedStackSlots offset
828360784Sdim  // by this amount for a tail call. In a sibling call it must be 0 because the
829360784Sdim  // caller will deallocate the entire stack and the callee still expects its
830360784Sdim  // arguments to begin at SP+0.
831360784Sdim  int FPDiff = 0;
832360784Sdim
833360784Sdim  // This will be 0 for sibcalls, potentially nonzero for tail calls produced
834360784Sdim  // by -tailcallopt. For sibcalls, the memory operands for the call are
835360784Sdim  // already available in the caller's incoming argument space.
836360784Sdim  unsigned NumBytes = 0;
837360784Sdim  if (!IsSibCall) {
838360784Sdim    // We aren't sibcalling, so we need to compute FPDiff. We need to do this
839360784Sdim    // before handling assignments, because FPDiff must be known for memory
840360784Sdim    // arguments.
841360784Sdim    unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
842360784Sdim    SmallVector<CCValAssign, 16> OutLocs;
843360784Sdim    CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
844360784Sdim    analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg);
845360784Sdim
846360784Sdim    // The callee will pop the argument stack as a tail call. Thus, we must
847360784Sdim    // keep it 16-byte aligned.
848360784Sdim    NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
849360784Sdim
850360784Sdim    // FPDiff will be negative if this tail call requires more space than we
851360784Sdim    // would automatically have in our incoming argument space. Positive if we
852360784Sdim    // actually shrink the stack.
853360784Sdim    FPDiff = NumReusableBytes - NumBytes;
854360784Sdim
855360784Sdim    // The stack pointer must be 16-byte aligned at all times it's used for a
856360784Sdim    // memory operation, which in practice means at *all* times and in
857360784Sdim    // particular across call boundaries. Therefore our own arguments started at
858360784Sdim    // a 16-byte aligned SP and the delta applied for the tail call should
859360784Sdim    // satisfy the same constraint.
860360784Sdim    assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
861360784Sdim  }
862360784Sdim
863360784Sdim  const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
864360784Sdim
865360784Sdim  // Do the actual argument marshalling.
866360784Sdim  SmallVector<unsigned, 8> PhysRegs;
867360784Sdim  OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
868360784Sdim                             AssignFnVarArg, true, FPDiff);
869360784Sdim  if (!handleAssignments(MIRBuilder, OutArgs, Handler))
870360784Sdim    return false;
871360784Sdim
872360784Sdim  if (Info.IsVarArg && Info.IsMustTailCall) {
873360784Sdim    // Now we know what's being passed to the function. Add uses to the call for
874360784Sdim    // the forwarded registers that we *aren't* passing as parameters. This will
875360784Sdim    // preserve the copies we build earlier.
876360784Sdim    for (const auto &F : Forwards) {
877360784Sdim      Register ForwardedReg = F.PReg;
878360784Sdim      // If the register is already passed, or aliases a register which is
879360784Sdim      // already being passed, then skip it.
880360784Sdim      if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
881360784Sdim            if (!Use.isReg())
882360784Sdim              return false;
883360784Sdim            return TRI->regsOverlap(Use.getReg(), ForwardedReg);
884360784Sdim          }))
885360784Sdim        continue;
886360784Sdim
887360784Sdim      // We aren't passing it already, so we should add it to the call.
888360784Sdim      MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
889360784Sdim      MIB.addReg(ForwardedReg, RegState::Implicit);
890360784Sdim    }
891360784Sdim  }
892360784Sdim
893360784Sdim  // If we have -tailcallopt, we need to adjust the stack. We'll do the call
894360784Sdim  // sequence start and end here.
895360784Sdim  if (!IsSibCall) {
896360784Sdim    MIB->getOperand(1).setImm(FPDiff);
897360784Sdim    CallSeqStart.addImm(NumBytes).addImm(0);
898360784Sdim    // End the call sequence *before* emitting the call. Normally, we would
899360784Sdim    // tidy the frame up after the call. However, here, we've laid out the
900360784Sdim    // parameters so that when SP is reset, they will be in the correct
901360784Sdim    // location.
902360784Sdim    MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
903360784Sdim  }
904360784Sdim
905360784Sdim  // Now we can add the actual call instruction to the correct basic block.
906360784Sdim  MIRBuilder.insertInstr(MIB);
907360784Sdim
908360784Sdim  // If Callee is a reg, since it is used by a target specific instruction,
909360784Sdim  // it must have a register class matching the constraint of that instruction.
910360784Sdim  if (Info.Callee.isReg())
911360784Sdim    MIB->getOperand(0).setReg(constrainOperandRegClass(
912360784Sdim        MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
913360784Sdim        *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
914360784Sdim        0));
915360784Sdim
916360784Sdim  MF.getFrameInfo().setHasTailCall();
917360784Sdim  Info.LoweredTailCall = true;
918360784Sdim  return true;
919360784Sdim}
920360784Sdim
921314564Sdimbool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
922360784Sdim                                    CallLoweringInfo &Info) const {
923303231Sdim  MachineFunction &MF = MIRBuilder.getMF();
924327952Sdim  const Function &F = MF.getFunction();
925314564Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
926314564Sdim  auto &DL = F.getParent()->getDataLayout();
927360784Sdim  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
928303231Sdim
929360784Sdim  SmallVector<ArgInfo, 8> OutArgs;
930360784Sdim  for (auto &OrigArg : Info.OrigArgs) {
931360784Sdim    splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
932353358Sdim    // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
933353358Sdim    if (OrigArg.Ty->isIntegerTy(1))
934360784Sdim      OutArgs.back().Flags[0].setZExt();
935314564Sdim  }
936303231Sdim
937360784Sdim  SmallVector<ArgInfo, 8> InArgs;
938360784Sdim  if (!Info.OrigRet.Ty->isVoidTy())
939360784Sdim    splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv());
940360784Sdim
941360784Sdim  // If we can lower as a tail call, do that instead.
942360784Sdim  bool CanTailCallOpt =
943360784Sdim      isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
944360784Sdim
945360784Sdim  // We must emit a tail call if we have musttail.
946360784Sdim  if (Info.IsMustTailCall && !CanTailCallOpt) {
947360784Sdim    // There are types of incoming/outgoing arguments we can't handle yet, so
948360784Sdim    // it doesn't make sense to actually die here like in ISelLowering. Instead,
949360784Sdim    // fall back to SelectionDAG and let it try to handle this.
950360784Sdim    LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
951360784Sdim    return false;
952360784Sdim  }
953360784Sdim
954360784Sdim  if (CanTailCallOpt)
955360784Sdim    return lowerTailCall(MIRBuilder, Info, OutArgs);
956360784Sdim
957314564Sdim  // Find out which ABI gets to decide where things go.
958360784Sdim  CCAssignFn *AssignFnFixed;
959360784Sdim  CCAssignFn *AssignFnVarArg;
960360784Sdim  std::tie(AssignFnFixed, AssignFnVarArg) =
961360784Sdim      getAssignFnsForCC(Info.CallConv, TLI);
962303231Sdim
963360784Sdim  MachineInstrBuilder CallSeqStart;
964360784Sdim  CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
965321369Sdim
966314564Sdim  // Create a temporarily-floating call instruction so we can add the implicit
967314564Sdim  // uses of arg registers.
968360784Sdim  unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false);
969303231Sdim
970360784Sdim  auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
971360784Sdim  MIB.add(Info.Callee);
972360784Sdim
973314564Sdim  // Tell the call which registers are clobbered.
974344779Sdim  auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
975360784Sdim  const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
976344779Sdim  if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
977344779Sdim    TRI->UpdateCustomCallPreservedMask(MF, &Mask);
978344779Sdim  MIB.addRegMask(Mask);
979314564Sdim
980344779Sdim  if (TRI->isAnyArgRegReserved(MF))
981344779Sdim    TRI->emitReservedArgRegCallError(MF);
982344779Sdim
983314564Sdim  // Do the actual argument marshalling.
984314564Sdim  SmallVector<unsigned, 8> PhysRegs;
985321369Sdim  OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
986360784Sdim                             AssignFnVarArg, false);
987360784Sdim  if (!handleAssignments(MIRBuilder, OutArgs, Handler))
988314564Sdim    return false;
989314564Sdim
990314564Sdim  // Now we can add the actual call instruction to the correct basic block.
991314564Sdim  MIRBuilder.insertInstr(MIB);
992314564Sdim
993314564Sdim  // If Callee is a reg, since it is used by a target specific
994314564Sdim  // instruction, it must have a register class matching the
995314564Sdim  // constraint of that instruction.
996360784Sdim  if (Info.Callee.isReg())
997314564Sdim    MIB->getOperand(0).setReg(constrainOperandRegClass(
998314564Sdim        MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
999360784Sdim        *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
1000360784Sdim        0));
1001314564Sdim
1002314564Sdim  // Finally we can copy the returned value back into its virtual-register. In
1003360784Sdim  // symmetry with the arguments, the physical register must be an
1004314564Sdim  // implicit-define of the call instruction.
1005360784Sdim  if (!Info.OrigRet.Ty->isVoidTy()) {
1006360784Sdim    CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1007321369Sdim    CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
1008360784Sdim    if (!handleAssignments(MIRBuilder, InArgs, Handler))
1009314564Sdim      return false;
1010353358Sdim  }
1011314564Sdim
1012360784Sdim  if (Info.SwiftErrorVReg) {
1013353358Sdim    MIB.addDef(AArch64::X21, RegState::Implicit);
1014360784Sdim    MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1015303231Sdim  }
1016314564Sdim
1017360784Sdim  uint64_t CalleePopBytes =
1018360784Sdim      doesCalleeRestoreStack(Info.CallConv,
1019360784Sdim                             MF.getTarget().Options.GuaranteedTailCallOpt)
1020360784Sdim          ? alignTo(Handler.StackSize, 16)
1021360784Sdim          : 0;
1022360784Sdim
1023321369Sdim  CallSeqStart.addImm(Handler.StackSize).addImm(0);
1024321369Sdim  MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1025321369Sdim      .addImm(Handler.StackSize)
1026360784Sdim      .addImm(CalleePopBytes);
1027321369Sdim
1028303231Sdim  return true;
1029303231Sdim}
1030