1321369Sdim//===--- AArch64CallLowering.cpp - Call lowering --------------------------===// 2303231Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6303231Sdim// 7303231Sdim//===----------------------------------------------------------------------===// 8303231Sdim/// 9303231Sdim/// \file 10303231Sdim/// This file implements the lowering of LLVM calls to machine code calls for 11303231Sdim/// GlobalISel. 12303231Sdim/// 13303231Sdim//===----------------------------------------------------------------------===// 14303231Sdim 15303231Sdim#include "AArch64CallLowering.h" 16303231Sdim#include "AArch64ISelLowering.h" 17321369Sdim#include "AArch64MachineFunctionInfo.h" 18321369Sdim#include "AArch64Subtarget.h" 19321369Sdim#include "llvm/ADT/ArrayRef.h" 20321369Sdim#include "llvm/ADT/SmallVector.h" 21314564Sdim#include "llvm/CodeGen/Analysis.h" 22321369Sdim#include "llvm/CodeGen/CallingConvLower.h" 23303231Sdim#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 24314564Sdim#include "llvm/CodeGen/GlobalISel/Utils.h" 25321369Sdim#include "llvm/CodeGen/LowLevelType.h" 26321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 27321369Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 28321369Sdim#include "llvm/CodeGen/MachineFunction.h" 29303231Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 30321369Sdim#include "llvm/CodeGen/MachineMemOperand.h" 31321369Sdim#include "llvm/CodeGen/MachineOperand.h" 32314564Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 33327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 34327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 35321369Sdim#include "llvm/CodeGen/ValueTypes.h" 36321369Sdim#include "llvm/IR/Argument.h" 37321369Sdim#include "llvm/IR/Attributes.h" 38321369Sdim#include "llvm/IR/Function.h" 39321369Sdim#include "llvm/IR/Type.h" 40321369Sdim#include "llvm/IR/Value.h" 41341825Sdim#include "llvm/Support/MachineValueType.h" 42321369Sdim#include <algorithm> 43321369Sdim#include <cassert> 44321369Sdim#include <cstdint> 45321369Sdim#include <iterator> 46321369Sdim 47353358Sdim#define DEBUG_TYPE "aarch64-call-lowering" 48353358Sdim 49303231Sdimusing namespace llvm; 50303231Sdim 51303231SdimAArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) 52321369Sdim : CallLowering(&TLI) {} 53303231Sdim 54327952Sdimnamespace { 55314564Sdimstruct IncomingArgHandler : public CallLowering::ValueHandler { 56321369Sdim IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 57321369Sdim CCAssignFn *AssignFn) 58321369Sdim : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {} 59314564Sdim 60353358Sdim Register getStackAddress(uint64_t Size, int64_t Offset, 61314564Sdim MachinePointerInfo &MPO) override { 62314564Sdim auto &MFI = MIRBuilder.getMF().getFrameInfo(); 63314564Sdim int FI = MFI.CreateFixedObject(Size, Offset, true); 64314564Sdim MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); 65353358Sdim Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64)); 66314564Sdim MIRBuilder.buildFrameIndex(AddrReg, FI); 67321369Sdim StackUsed = std::max(StackUsed, Size + Offset); 68314564Sdim return AddrReg; 69314564Sdim } 70314564Sdim 71353358Sdim void assignValueToReg(Register ValVReg, Register PhysReg, 72314564Sdim CCValAssign &VA) override { 73314564Sdim markPhysRegUsed(PhysReg); 74327952Sdim switch (VA.getLocInfo()) { 75327952Sdim default: 76327952Sdim MIRBuilder.buildCopy(ValVReg, PhysReg); 77327952Sdim break; 78327952Sdim case CCValAssign::LocInfo::SExt: 79327952Sdim case CCValAssign::LocInfo::ZExt: 80327952Sdim case CCValAssign::LocInfo::AExt: { 81327952Sdim auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); 82327952Sdim MIRBuilder.buildTrunc(ValVReg, Copy); 83327952Sdim break; 84327952Sdim } 85327952Sdim } 86314564Sdim } 87314564Sdim 88353358Sdim void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 89314564Sdim MachinePointerInfo &MPO, CCValAssign &VA) override { 90353358Sdim // FIXME: Get alignment 91314564Sdim auto MMO = MIRBuilder.getMF().getMachineMemOperand( 92314564Sdim MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 93353358Sdim 1); 94314564Sdim MIRBuilder.buildLoad(ValVReg, Addr, *MMO); 95314564Sdim } 96314564Sdim 97314564Sdim /// How the physical register gets marked varies between formal 98314564Sdim /// parameters (it's a basic-block live-in), and a call instruction 99314564Sdim /// (it's an implicit-def of the BL). 100314564Sdim virtual void markPhysRegUsed(unsigned PhysReg) = 0; 101321369Sdim 102360784Sdim bool isIncomingArgumentHandler() const override { return true; } 103353358Sdim 104321369Sdim uint64_t StackUsed; 105314564Sdim}; 106314564Sdim 107314564Sdimstruct FormalArgHandler : public IncomingArgHandler { 108321369Sdim FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 109321369Sdim CCAssignFn *AssignFn) 110321369Sdim : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} 111314564Sdim 112314564Sdim void markPhysRegUsed(unsigned PhysReg) override { 113360784Sdim MIRBuilder.getMRI()->addLiveIn(PhysReg); 114314564Sdim MIRBuilder.getMBB().addLiveIn(PhysReg); 115314564Sdim } 116314564Sdim}; 117314564Sdim 118314564Sdimstruct CallReturnHandler : public IncomingArgHandler { 119314564Sdim CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 120321369Sdim MachineInstrBuilder MIB, CCAssignFn *AssignFn) 121321369Sdim : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} 122314564Sdim 123314564Sdim void markPhysRegUsed(unsigned PhysReg) override { 124314564Sdim MIB.addDef(PhysReg, RegState::Implicit); 125314564Sdim } 126314564Sdim 127314564Sdim MachineInstrBuilder MIB; 128314564Sdim}; 129314564Sdim 130314564Sdimstruct OutgoingArgHandler : public CallLowering::ValueHandler { 131314564Sdim OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 132321369Sdim MachineInstrBuilder MIB, CCAssignFn *AssignFn, 133360784Sdim CCAssignFn *AssignFnVarArg, bool IsTailCall = false, 134360784Sdim int FPDiff = 0) 135321369Sdim : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), 136360784Sdim AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff), 137360784Sdim StackSize(0) {} 138314564Sdim 139360784Sdim bool isIncomingArgumentHandler() const override { return false; } 140360784Sdim 141353358Sdim Register getStackAddress(uint64_t Size, int64_t Offset, 142314564Sdim MachinePointerInfo &MPO) override { 143360784Sdim MachineFunction &MF = MIRBuilder.getMF(); 144314564Sdim LLT p0 = LLT::pointer(0, 64); 145314564Sdim LLT s64 = LLT::scalar(64); 146360784Sdim 147360784Sdim if (IsTailCall) { 148360784Sdim Offset += FPDiff; 149360784Sdim int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); 150360784Sdim Register FIReg = MRI.createGenericVirtualRegister(p0); 151360784Sdim MIRBuilder.buildFrameIndex(FIReg, FI); 152360784Sdim MPO = MachinePointerInfo::getFixedStack(MF, FI); 153360784Sdim return FIReg; 154360784Sdim } 155360784Sdim 156353358Sdim Register SPReg = MRI.createGenericVirtualRegister(p0); 157353358Sdim MIRBuilder.buildCopy(SPReg, Register(AArch64::SP)); 158314564Sdim 159353358Sdim Register OffsetReg = MRI.createGenericVirtualRegister(s64); 160314564Sdim MIRBuilder.buildConstant(OffsetReg, Offset); 161314564Sdim 162353358Sdim Register AddrReg = MRI.createGenericVirtualRegister(p0); 163360784Sdim MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg); 164314564Sdim 165360784Sdim MPO = MachinePointerInfo::getStack(MF, Offset); 166314564Sdim return AddrReg; 167314564Sdim } 168314564Sdim 169353358Sdim void assignValueToReg(Register ValVReg, Register PhysReg, 170314564Sdim CCValAssign &VA) override { 171314564Sdim MIB.addUse(PhysReg, RegState::Implicit); 172353358Sdim Register ExtReg = extendRegister(ValVReg, VA); 173314564Sdim MIRBuilder.buildCopy(PhysReg, ExtReg); 174314564Sdim } 175314564Sdim 176353358Sdim void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, 177314564Sdim MachinePointerInfo &MPO, CCValAssign &VA) override { 178341825Sdim if (VA.getLocInfo() == CCValAssign::LocInfo::AExt) { 179341825Sdim Size = VA.getLocVT().getSizeInBits() / 8; 180341825Sdim ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(Size * 8), ValVReg) 181341825Sdim ->getOperand(0) 182341825Sdim .getReg(); 183341825Sdim } 184314564Sdim auto MMO = MIRBuilder.getMF().getMachineMemOperand( 185353358Sdim MPO, MachineMemOperand::MOStore, Size, 1); 186314564Sdim MIRBuilder.buildStore(ValVReg, Addr, *MMO); 187314564Sdim } 188314564Sdim 189321369Sdim bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, 190321369Sdim CCValAssign::LocInfo LocInfo, 191321369Sdim const CallLowering::ArgInfo &Info, 192360784Sdim ISD::ArgFlagsTy Flags, 193321369Sdim CCState &State) override { 194321369Sdim bool Res; 195321369Sdim if (Info.IsFixed) 196360784Sdim Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); 197321369Sdim else 198360784Sdim Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); 199321369Sdim 200321369Sdim StackSize = State.getNextStackOffset(); 201321369Sdim return Res; 202321369Sdim } 203321369Sdim 204314564Sdim MachineInstrBuilder MIB; 205321369Sdim CCAssignFn *AssignFnVarArg; 206360784Sdim bool IsTailCall; 207360784Sdim 208360784Sdim /// For tail calls, the byte offset of the call's argument area from the 209360784Sdim /// callee's. Unused elsewhere. 210360784Sdim int FPDiff; 211321369Sdim uint64_t StackSize; 212314564Sdim}; 213327952Sdim} // namespace 214314564Sdim 215360784Sdimstatic bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { 216360784Sdim return CallConv == CallingConv::Fast && TailCallOpt; 217360784Sdim} 218360784Sdim 219321369Sdimvoid AArch64CallLowering::splitToValueTypes( 220321369Sdim const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, 221353358Sdim const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const { 222314564Sdim const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 223314564Sdim LLVMContext &Ctx = OrigArg.Ty->getContext(); 224314564Sdim 225341825Sdim if (OrigArg.Ty->isVoidTy()) 226341825Sdim return; 227341825Sdim 228314564Sdim SmallVector<EVT, 4> SplitVTs; 229314564Sdim SmallVector<uint64_t, 4> Offsets; 230314564Sdim ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); 231314564Sdim 232314564Sdim if (SplitVTs.size() == 1) { 233314564Sdim // No splitting to do, but we want to replace the original type (e.g. [1 x 234314564Sdim // double] -> double). 235353358Sdim SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), 236360784Sdim OrigArg.Flags[0], OrigArg.IsFixed); 237314564Sdim return; 238314564Sdim } 239314564Sdim 240353358Sdim // Create one ArgInfo for each virtual register in the original ArgInfo. 241353358Sdim assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); 242353358Sdim 243327952Sdim bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( 244327952Sdim OrigArg.Ty, CallConv, false); 245353358Sdim for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { 246353358Sdim Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); 247360784Sdim SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0], 248353358Sdim OrigArg.IsFixed); 249327952Sdim if (NeedsRegBlock) 250360784Sdim SplitArgs.back().Flags[0].setInConsecutiveRegs(); 251314564Sdim } 252314564Sdim 253360784Sdim SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); 254314564Sdim} 255314564Sdim 256303231Sdimbool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, 257344779Sdim const Value *Val, 258353358Sdim ArrayRef<Register> VRegs, 259353358Sdim Register SwiftErrorVReg) const { 260344779Sdim auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR); 261344779Sdim assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && 262344779Sdim "Return value without a vreg"); 263303231Sdim 264314564Sdim bool Success = true; 265344779Sdim if (!VRegs.empty()) { 266344779Sdim MachineFunction &MF = MIRBuilder.getMF(); 267344779Sdim const Function &F = MF.getFunction(); 268344779Sdim 269341825Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 270314564Sdim const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 271314564Sdim CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); 272314564Sdim auto &DL = F.getParent()->getDataLayout(); 273344779Sdim LLVMContext &Ctx = Val->getType()->getContext(); 274314564Sdim 275344779Sdim SmallVector<EVT, 4> SplitEVTs; 276344779Sdim ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); 277344779Sdim assert(VRegs.size() == SplitEVTs.size() && 278344779Sdim "For each split Type there should be exactly one VReg."); 279314564Sdim 280314564Sdim SmallVector<ArgInfo, 8> SplitArgs; 281353358Sdim CallingConv::ID CC = F.getCallingConv(); 282353358Sdim 283344779Sdim for (unsigned i = 0; i < SplitEVTs.size(); ++i) { 284353358Sdim if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) { 285353358Sdim LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split"); 286353358Sdim return false; 287344779Sdim } 288314564Sdim 289353358Sdim Register CurVReg = VRegs[i]; 290344779Sdim ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)}; 291344779Sdim setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); 292353358Sdim 293353358Sdim // i1 is a special case because SDAG i1 true is naturally zero extended 294353358Sdim // when widened using ANYEXT. We need to do it explicitly here. 295353358Sdim if (MRI.getType(CurVReg).getSizeInBits() == 1) { 296353358Sdim CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); 297353358Sdim } else { 298353358Sdim // Some types will need extending as specified by the CC. 299353358Sdim MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]); 300353358Sdim if (EVT(NewVT) != SplitEVTs[i]) { 301353358Sdim unsigned ExtendOp = TargetOpcode::G_ANYEXT; 302353358Sdim if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, 303353358Sdim Attribute::SExt)) 304353358Sdim ExtendOp = TargetOpcode::G_SEXT; 305353358Sdim else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, 306353358Sdim Attribute::ZExt)) 307353358Sdim ExtendOp = TargetOpcode::G_ZEXT; 308353358Sdim 309353358Sdim LLT NewLLT(NewVT); 310353358Sdim LLT OldLLT(MVT::getVT(CurArgInfo.Ty)); 311353358Sdim CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx); 312353358Sdim // Instead of an extend, we might have a vector type which needs 313353358Sdim // padding with more elements, e.g. <2 x half> -> <4 x half>. 314353358Sdim if (NewVT.isVector()) { 315353358Sdim if (OldLLT.isVector()) { 316353358Sdim if (NewLLT.getNumElements() > OldLLT.getNumElements()) { 317353358Sdim // We don't handle VA types which are not exactly twice the 318353358Sdim // size, but can easily be done in future. 319353358Sdim if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) { 320353358Sdim LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts"); 321353358Sdim return false; 322353358Sdim } 323353358Sdim auto Undef = MIRBuilder.buildUndef({OldLLT}); 324353358Sdim CurVReg = 325353358Sdim MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef.getReg(0)}) 326353358Sdim .getReg(0); 327353358Sdim } else { 328353358Sdim // Just do a vector extend. 329353358Sdim CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) 330353358Sdim .getReg(0); 331353358Sdim } 332353358Sdim } else if (NewLLT.getNumElements() == 2) { 333353358Sdim // We need to pad a <1 x S> type to <2 x S>. Since we don't have 334353358Sdim // <1 x S> vector types in GISel we use a build_vector instead 335353358Sdim // of a vector merge/concat. 336353358Sdim auto Undef = MIRBuilder.buildUndef({OldLLT}); 337353358Sdim CurVReg = 338353358Sdim MIRBuilder 339353358Sdim .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)}) 340353358Sdim .getReg(0); 341353358Sdim } else { 342353358Sdim LLVM_DEBUG(dbgs() << "Could not handle ret ty"); 343353358Sdim return false; 344353358Sdim } 345353358Sdim } else { 346353358Sdim // A scalar extend. 347353358Sdim CurVReg = 348353358Sdim MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0); 349353358Sdim } 350353358Sdim } 351353358Sdim } 352353358Sdim if (CurVReg != CurArgInfo.Regs[0]) { 353353358Sdim CurArgInfo.Regs[0] = CurVReg; 354353358Sdim // Reset the arg flags after modifying CurVReg. 355353358Sdim setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); 356353358Sdim } 357353358Sdim splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC); 358344779Sdim } 359344779Sdim 360321369Sdim OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn); 361321369Sdim Success = handleAssignments(MIRBuilder, SplitArgs, Handler); 362303231Sdim } 363314564Sdim 364353358Sdim if (SwiftErrorVReg) { 365353358Sdim MIB.addUse(AArch64::X21, RegState::Implicit); 366353358Sdim MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg); 367353358Sdim } 368353358Sdim 369314564Sdim MIRBuilder.insertInstr(MIB); 370314564Sdim return Success; 371314564Sdim} 372314564Sdim 373360784Sdim/// Helper function to compute forwarded registers for musttail calls. Computes 374360784Sdim/// the forwarded registers, sets MBB liveness, and emits COPY instructions that 375360784Sdim/// can be used to save + restore registers later. 376360784Sdimstatic void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, 377360784Sdim CCAssignFn *AssignFn) { 378360784Sdim MachineBasicBlock &MBB = MIRBuilder.getMBB(); 379360784Sdim MachineFunction &MF = MIRBuilder.getMF(); 380360784Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 381360784Sdim 382360784Sdim if (!MFI.hasMustTailInVarArgFunc()) 383360784Sdim return; 384360784Sdim 385360784Sdim AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 386360784Sdim const Function &F = MF.getFunction(); 387360784Sdim assert(F.isVarArg() && "Expected F to be vararg?"); 388360784Sdim 389360784Sdim // Compute the set of forwarded registers. The rest are scratch. 390360784Sdim SmallVector<CCValAssign, 16> ArgLocs; 391360784Sdim CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, 392360784Sdim F.getContext()); 393360784Sdim SmallVector<MVT, 2> RegParmTypes; 394360784Sdim RegParmTypes.push_back(MVT::i64); 395360784Sdim RegParmTypes.push_back(MVT::f128); 396360784Sdim 397360784Sdim // Later on, we can use this vector to restore the registers if necessary. 398360784Sdim SmallVectorImpl<ForwardedRegister> &Forwards = 399360784Sdim FuncInfo->getForwardedMustTailRegParms(); 400360784Sdim CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn); 401360784Sdim 402360784Sdim // Conservatively forward X8, since it might be used for an aggregate 403360784Sdim // return. 404360784Sdim if (!CCInfo.isAllocated(AArch64::X8)) { 405360784Sdim unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); 406360784Sdim Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); 407360784Sdim } 408360784Sdim 409360784Sdim // Add the forwards to the MachineBasicBlock and MachineFunction. 410360784Sdim for (const auto &F : Forwards) { 411360784Sdim MBB.addLiveIn(F.PReg); 412360784Sdim MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg)); 413360784Sdim } 414360784Sdim} 415360784Sdim 416353358Sdimbool AArch64CallLowering::lowerFormalArguments( 417353358Sdim MachineIRBuilder &MIRBuilder, const Function &F, 418353358Sdim ArrayRef<ArrayRef<Register>> VRegs) const { 419314564Sdim MachineFunction &MF = MIRBuilder.getMF(); 420314564Sdim MachineBasicBlock &MBB = MIRBuilder.getMBB(); 421314564Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 422314564Sdim auto &DL = F.getParent()->getDataLayout(); 423314564Sdim 424314564Sdim SmallVector<ArgInfo, 8> SplitArgs; 425314564Sdim unsigned i = 0; 426321369Sdim for (auto &Arg : F.args()) { 427327952Sdim if (DL.getTypeStoreSize(Arg.getType()) == 0) 428327952Sdim continue; 429353358Sdim 430314564Sdim ArgInfo OrigArg{VRegs[i], Arg.getType()}; 431321369Sdim setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); 432321369Sdim 433353358Sdim splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv()); 434314564Sdim ++i; 435314564Sdim } 436314564Sdim 437314564Sdim if (!MBB.empty()) 438314564Sdim MIRBuilder.setInstr(*MBB.begin()); 439314564Sdim 440314564Sdim const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 441314564Sdim CCAssignFn *AssignFn = 442314564Sdim TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); 443314564Sdim 444321369Sdim FormalArgHandler Handler(MIRBuilder, MRI, AssignFn); 445321369Sdim if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) 446314564Sdim return false; 447314564Sdim 448360784Sdim AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 449360784Sdim uint64_t StackOffset = Handler.StackUsed; 450321369Sdim if (F.isVarArg()) { 451360784Sdim auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 452360784Sdim if (!Subtarget.isTargetDarwin()) { 453360784Sdim // FIXME: we need to reimplement saveVarArgsRegisters from 454321369Sdim // AArch64ISelLowering. 455321369Sdim return false; 456321369Sdim } 457321369Sdim 458360784Sdim // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. 459360784Sdim StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8); 460321369Sdim 461321369Sdim auto &MFI = MIRBuilder.getMF().getFrameInfo(); 462321369Sdim FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); 463321369Sdim } 464321369Sdim 465360784Sdim if (doesCalleeRestoreStack(F.getCallingConv(), 466360784Sdim MF.getTarget().Options.GuaranteedTailCallOpt)) { 467360784Sdim // We have a non-standard ABI, so why not make full use of the stack that 468360784Sdim // we're going to pop? It must be aligned to 16 B in any case. 469360784Sdim StackOffset = alignTo(StackOffset, 16); 470360784Sdim 471360784Sdim // If we're expected to restore the stack (e.g. fastcc), then we'll be 472360784Sdim // adding a multiple of 16. 473360784Sdim FuncInfo->setArgumentStackToRestore(StackOffset); 474360784Sdim 475360784Sdim // Our own callers will guarantee that the space is free by giving an 476360784Sdim // aligned value to CALLSEQ_START. 477360784Sdim } 478360784Sdim 479360784Sdim // When we tail call, we need to check if the callee's arguments 480360784Sdim // will fit on the caller's stack. So, whenever we lower formal arguments, 481360784Sdim // we should keep track of this information, since we might lower a tail call 482360784Sdim // in this function later. 483360784Sdim FuncInfo->setBytesInStackArgArea(StackOffset); 484360784Sdim 485344779Sdim auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 486344779Sdim if (Subtarget.hasCustomCallingConv()) 487344779Sdim Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); 488344779Sdim 489360784Sdim handleMustTailForwardedRegisters(MIRBuilder, AssignFn); 490360784Sdim 491314564Sdim // Move back to the end of the basic block. 492314564Sdim MIRBuilder.setMBB(MBB); 493314564Sdim 494303231Sdim return true; 495303231Sdim} 496303231Sdim 497360784Sdim/// Return true if the calling convention is one that we can guarantee TCO for. 498360784Sdimstatic bool canGuaranteeTCO(CallingConv::ID CC) { 499360784Sdim return CC == CallingConv::Fast; 500360784Sdim} 501360784Sdim 502360784Sdim/// Return true if we might ever do TCO for calls with this calling convention. 503360784Sdimstatic bool mayTailCallThisCC(CallingConv::ID CC) { 504360784Sdim switch (CC) { 505360784Sdim case CallingConv::C: 506360784Sdim case CallingConv::PreserveMost: 507360784Sdim case CallingConv::Swift: 508360784Sdim return true; 509360784Sdim default: 510360784Sdim return canGuaranteeTCO(CC); 511360784Sdim } 512360784Sdim} 513360784Sdim 514360784Sdim/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for 515360784Sdim/// CC. 516360784Sdimstatic std::pair<CCAssignFn *, CCAssignFn *> 517360784SdimgetAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { 518360784Sdim return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)}; 519360784Sdim} 520360784Sdim 521360784Sdimbool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( 522360784Sdim CallLoweringInfo &Info, MachineFunction &MF, 523360784Sdim SmallVectorImpl<ArgInfo> &InArgs) const { 524360784Sdim const Function &CallerF = MF.getFunction(); 525360784Sdim CallingConv::ID CalleeCC = Info.CallConv; 526360784Sdim CallingConv::ID CallerCC = CallerF.getCallingConv(); 527360784Sdim 528360784Sdim // If the calling conventions match, then everything must be the same. 529360784Sdim if (CalleeCC == CallerCC) 530360784Sdim return true; 531360784Sdim 532360784Sdim // Check if the caller and callee will handle arguments in the same way. 533360784Sdim const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 534360784Sdim CCAssignFn *CalleeAssignFnFixed; 535360784Sdim CCAssignFn *CalleeAssignFnVarArg; 536360784Sdim std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = 537360784Sdim getAssignFnsForCC(CalleeCC, TLI); 538360784Sdim 539360784Sdim CCAssignFn *CallerAssignFnFixed; 540360784Sdim CCAssignFn *CallerAssignFnVarArg; 541360784Sdim std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = 542360784Sdim getAssignFnsForCC(CallerCC, TLI); 543360784Sdim 544360784Sdim if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed, 545360784Sdim *CalleeAssignFnVarArg, *CallerAssignFnFixed, 546360784Sdim *CallerAssignFnVarArg)) 547360784Sdim return false; 548360784Sdim 549360784Sdim // Make sure that the caller and callee preserve all of the same registers. 550360784Sdim auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 551360784Sdim const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 552360784Sdim const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 553360784Sdim if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) { 554360784Sdim TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); 555360784Sdim TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); 556360784Sdim } 557360784Sdim 558360784Sdim return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved); 559360784Sdim} 560360784Sdim 561360784Sdimbool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( 562360784Sdim CallLoweringInfo &Info, MachineFunction &MF, 563360784Sdim SmallVectorImpl<ArgInfo> &OutArgs) const { 564360784Sdim // If there are no outgoing arguments, then we are done. 565360784Sdim if (OutArgs.empty()) 566360784Sdim return true; 567360784Sdim 568360784Sdim const Function &CallerF = MF.getFunction(); 569360784Sdim CallingConv::ID CalleeCC = Info.CallConv; 570360784Sdim CallingConv::ID CallerCC = CallerF.getCallingConv(); 571360784Sdim const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 572360784Sdim 573360784Sdim CCAssignFn *AssignFnFixed; 574360784Sdim CCAssignFn *AssignFnVarArg; 575360784Sdim std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); 576360784Sdim 577360784Sdim // We have outgoing arguments. Make sure that we can tail call with them. 578360784Sdim SmallVector<CCValAssign, 16> OutLocs; 579360784Sdim CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext()); 580360784Sdim 581360784Sdim if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) { 582360784Sdim LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); 583360784Sdim return false; 584360784Sdim } 585360784Sdim 586360784Sdim // Make sure that they can fit on the caller's stack. 587360784Sdim const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 588360784Sdim if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) { 589360784Sdim LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n"); 590360784Sdim return false; 591360784Sdim } 592360784Sdim 593360784Sdim // Verify that the parameters in callee-saved registers match. 594360784Sdim // TODO: Port this over to CallLowering as general code once swiftself is 595360784Sdim // supported. 596360784Sdim auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 597360784Sdim const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); 598360784Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 599360784Sdim 600360784Sdim for (unsigned i = 0; i < OutLocs.size(); ++i) { 601360784Sdim auto &ArgLoc = OutLocs[i]; 602360784Sdim // If it's not a register, it's fine. 603360784Sdim if (!ArgLoc.isRegLoc()) { 604360784Sdim if (Info.IsVarArg) { 605360784Sdim // Be conservative and disallow variadic memory operands to match SDAG's 606360784Sdim // behaviour. 607360784Sdim // FIXME: If the caller's calling convention is C, then we can 608360784Sdim // potentially use its argument area. However, for cases like fastcc, 609360784Sdim // we can't do anything. 610360784Sdim LLVM_DEBUG( 611360784Sdim dbgs() 612360784Sdim << "... Cannot tail call vararg function with stack arguments\n"); 613360784Sdim return false; 614360784Sdim } 615360784Sdim continue; 616360784Sdim } 617360784Sdim 618360784Sdim Register Reg = ArgLoc.getLocReg(); 619360784Sdim 620360784Sdim // Only look at callee-saved registers. 621360784Sdim if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) 622360784Sdim continue; 623360784Sdim 624360784Sdim LLVM_DEBUG( 625360784Sdim dbgs() 626360784Sdim << "... Call has an argument passed in a callee-saved register.\n"); 627360784Sdim 628360784Sdim // Check if it was copied from. 629360784Sdim ArgInfo &OutInfo = OutArgs[i]; 630360784Sdim 631360784Sdim if (OutInfo.Regs.size() > 1) { 632360784Sdim LLVM_DEBUG( 633360784Sdim dbgs() << "... Cannot handle arguments in multiple registers.\n"); 634360784Sdim return false; 635360784Sdim } 636360784Sdim 637360784Sdim // Check if we copy the register, walking through copies from virtual 638360784Sdim // registers. Note that getDefIgnoringCopies does not ignore copies from 639360784Sdim // physical registers. 640360784Sdim MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); 641360784Sdim if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { 642360784Sdim LLVM_DEBUG( 643360784Sdim dbgs() 644360784Sdim << "... Parameter was not copied into a VReg, cannot tail call.\n"); 645360784Sdim return false; 646360784Sdim } 647360784Sdim 648360784Sdim // Got a copy. Verify that it's the same as the register we want. 649360784Sdim Register CopyRHS = RegDef->getOperand(1).getReg(); 650360784Sdim if (CopyRHS != Reg) { 651360784Sdim LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " 652360784Sdim "VReg, cannot tail call.\n"); 653360784Sdim return false; 654360784Sdim } 655360784Sdim } 656360784Sdim 657360784Sdim return true; 658360784Sdim} 659360784Sdim 660360784Sdimbool AArch64CallLowering::isEligibleForTailCallOptimization( 661360784Sdim MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, 662360784Sdim SmallVectorImpl<ArgInfo> &InArgs, 663360784Sdim SmallVectorImpl<ArgInfo> &OutArgs) const { 664360784Sdim 665360784Sdim // Must pass all target-independent checks in order to tail call optimize. 666360784Sdim if (!Info.IsTailCall) 667360784Sdim return false; 668360784Sdim 669360784Sdim CallingConv::ID CalleeCC = Info.CallConv; 670360784Sdim MachineFunction &MF = MIRBuilder.getMF(); 671360784Sdim const Function &CallerF = MF.getFunction(); 672360784Sdim 673360784Sdim LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n"); 674360784Sdim 675360784Sdim if (Info.SwiftErrorVReg) { 676360784Sdim // TODO: We should handle this. 677360784Sdim // Note that this is also handled by the check for no outgoing arguments. 678360784Sdim // Proactively disabling this though, because the swifterror handling in 679360784Sdim // lowerCall inserts a COPY *after* the location of the call. 680360784Sdim LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n"); 681360784Sdim return false; 682360784Sdim } 683360784Sdim 684360784Sdim if (!mayTailCallThisCC(CalleeCC)) { 685360784Sdim LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n"); 686360784Sdim return false; 687360784Sdim } 688360784Sdim 689360784Sdim // Byval parameters hand the function a pointer directly into the stack area 690360784Sdim // we want to reuse during a tail call. Working around this *is* possible (see 691360784Sdim // X86). 692360784Sdim // 693360784Sdim // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try 694360784Sdim // it? 695360784Sdim // 696360784Sdim // On Windows, "inreg" attributes signify non-aggregate indirect returns. 697360784Sdim // In this case, it is necessary to save/restore X0 in the callee. Tail 698360784Sdim // call opt interferes with this. So we disable tail call opt when the 699360784Sdim // caller has an argument with "inreg" attribute. 700360784Sdim // 701360784Sdim // FIXME: Check whether the callee also has an "inreg" argument. 702360784Sdim // 703360784Sdim // When the caller has a swifterror argument, we don't want to tail call 704360784Sdim // because would have to move into the swifterror register before the 705360784Sdim // tail call. 706360784Sdim if (any_of(CallerF.args(), [](const Argument &A) { 707360784Sdim return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); 708360784Sdim })) { 709360784Sdim LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " 710360784Sdim "inreg, or swifterror arguments\n"); 711360784Sdim return false; 712360784Sdim } 713360784Sdim 714360784Sdim // Externally-defined functions with weak linkage should not be 715360784Sdim // tail-called on AArch64 when the OS does not support dynamic 716360784Sdim // pre-emption of symbols, as the AAELF spec requires normal calls 717360784Sdim // to undefined weak functions to be replaced with a NOP or jump to the 718360784Sdim // next instruction. The behaviour of branch instructions in this 719360784Sdim // situation (as used for tail calls) is implementation-defined, so we 720360784Sdim // cannot rely on the linker replacing the tail call with a return. 721360784Sdim if (Info.Callee.isGlobal()) { 722360784Sdim const GlobalValue *GV = Info.Callee.getGlobal(); 723360784Sdim const Triple &TT = MF.getTarget().getTargetTriple(); 724360784Sdim if (GV->hasExternalWeakLinkage() && 725360784Sdim (!TT.isOSWindows() || TT.isOSBinFormatELF() || 726360784Sdim TT.isOSBinFormatMachO())) { 727360784Sdim LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " 728360784Sdim "with weak linkage for this OS.\n"); 729360784Sdim return false; 730360784Sdim } 731360784Sdim } 732360784Sdim 733360784Sdim // If we have -tailcallopt, then we're done. 734360784Sdim if (MF.getTarget().Options.GuaranteedTailCallOpt) 735360784Sdim return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv(); 736360784Sdim 737360784Sdim // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). 738360784Sdim // Try to find cases where we can do that. 739360784Sdim 740360784Sdim // I want anyone implementing a new calling convention to think long and hard 741360784Sdim // about this assert. 742360784Sdim assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && 743360784Sdim "Unexpected variadic calling convention"); 744360784Sdim 745360784Sdim // Verify that the incoming and outgoing arguments from the callee are 746360784Sdim // safe to tail call. 747360784Sdim if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { 748360784Sdim LLVM_DEBUG( 749360784Sdim dbgs() 750360784Sdim << "... Caller and callee have incompatible calling conventions.\n"); 751360784Sdim return false; 752360784Sdim } 753360784Sdim 754360784Sdim if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs)) 755360784Sdim return false; 756360784Sdim 757360784Sdim LLVM_DEBUG( 758360784Sdim dbgs() << "... Call is eligible for tail call optimization.\n"); 759360784Sdim return true; 760360784Sdim} 761360784Sdim 762360784Sdimstatic unsigned getCallOpcode(const Function &CallerF, bool IsIndirect, 763360784Sdim bool IsTailCall) { 764360784Sdim if (!IsTailCall) 765360784Sdim return IsIndirect ? AArch64::BLR : AArch64::BL; 766360784Sdim 767360784Sdim if (!IsIndirect) 768360784Sdim return AArch64::TCRETURNdi; 769360784Sdim 770360784Sdim // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use 771360784Sdim // x16 or x17. 772360784Sdim if (CallerF.hasFnAttribute("branch-target-enforcement")) 773360784Sdim return AArch64::TCRETURNriBTI; 774360784Sdim 775360784Sdim return AArch64::TCRETURNri; 776360784Sdim} 777360784Sdim 778360784Sdimbool AArch64CallLowering::lowerTailCall( 779360784Sdim MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, 780360784Sdim SmallVectorImpl<ArgInfo> &OutArgs) const { 781360784Sdim MachineFunction &MF = MIRBuilder.getMF(); 782360784Sdim const Function &F = MF.getFunction(); 783360784Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 784360784Sdim const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 785360784Sdim AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 786360784Sdim 787360784Sdim // True when we're tail calling, but without -tailcallopt. 788360784Sdim bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt; 789360784Sdim 790360784Sdim // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 791360784Sdim // register class. Until we can do that, we should fall back here. 792360784Sdim if (F.hasFnAttribute("branch-target-enforcement")) { 793360784Sdim LLVM_DEBUG( 794360784Sdim dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n"); 795360784Sdim return false; 796360784Sdim } 797360784Sdim 798360784Sdim // Find out which ABI gets to decide where things go. 799360784Sdim CallingConv::ID CalleeCC = Info.CallConv; 800360784Sdim CCAssignFn *AssignFnFixed; 801360784Sdim CCAssignFn *AssignFnVarArg; 802360784Sdim std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); 803360784Sdim 804360784Sdim MachineInstrBuilder CallSeqStart; 805360784Sdim if (!IsSibCall) 806360784Sdim CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); 807360784Sdim 808360784Sdim unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true); 809360784Sdim auto MIB = MIRBuilder.buildInstrNoInsert(Opc); 810360784Sdim MIB.add(Info.Callee); 811360784Sdim 812360784Sdim // Byte offset for the tail call. When we are sibcalling, this will always 813360784Sdim // be 0. 814360784Sdim MIB.addImm(0); 815360784Sdim 816360784Sdim // Tell the call which registers are clobbered. 817360784Sdim auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 818360784Sdim const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); 819360784Sdim if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) 820360784Sdim TRI->UpdateCustomCallPreservedMask(MF, &Mask); 821360784Sdim MIB.addRegMask(Mask); 822360784Sdim 823360784Sdim if (TRI->isAnyArgRegReserved(MF)) 824360784Sdim TRI->emitReservedArgRegCallError(MF); 825360784Sdim 826360784Sdim // FPDiff is the byte offset of the call's argument area from the callee's. 827360784Sdim // Stores to callee stack arguments will be placed in FixedStackSlots offset 828360784Sdim // by this amount for a tail call. In a sibling call it must be 0 because the 829360784Sdim // caller will deallocate the entire stack and the callee still expects its 830360784Sdim // arguments to begin at SP+0. 831360784Sdim int FPDiff = 0; 832360784Sdim 833360784Sdim // This will be 0 for sibcalls, potentially nonzero for tail calls produced 834360784Sdim // by -tailcallopt. For sibcalls, the memory operands for the call are 835360784Sdim // already available in the caller's incoming argument space. 836360784Sdim unsigned NumBytes = 0; 837360784Sdim if (!IsSibCall) { 838360784Sdim // We aren't sibcalling, so we need to compute FPDiff. We need to do this 839360784Sdim // before handling assignments, because FPDiff must be known for memory 840360784Sdim // arguments. 841360784Sdim unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); 842360784Sdim SmallVector<CCValAssign, 16> OutLocs; 843360784Sdim CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); 844360784Sdim analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg); 845360784Sdim 846360784Sdim // The callee will pop the argument stack as a tail call. Thus, we must 847360784Sdim // keep it 16-byte aligned. 848360784Sdim NumBytes = alignTo(OutInfo.getNextStackOffset(), 16); 849360784Sdim 850360784Sdim // FPDiff will be negative if this tail call requires more space than we 851360784Sdim // would automatically have in our incoming argument space. Positive if we 852360784Sdim // actually shrink the stack. 853360784Sdim FPDiff = NumReusableBytes - NumBytes; 854360784Sdim 855360784Sdim // The stack pointer must be 16-byte aligned at all times it's used for a 856360784Sdim // memory operation, which in practice means at *all* times and in 857360784Sdim // particular across call boundaries. Therefore our own arguments started at 858360784Sdim // a 16-byte aligned SP and the delta applied for the tail call should 859360784Sdim // satisfy the same constraint. 860360784Sdim assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); 861360784Sdim } 862360784Sdim 863360784Sdim const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); 864360784Sdim 865360784Sdim // Do the actual argument marshalling. 866360784Sdim SmallVector<unsigned, 8> PhysRegs; 867360784Sdim OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, 868360784Sdim AssignFnVarArg, true, FPDiff); 869360784Sdim if (!handleAssignments(MIRBuilder, OutArgs, Handler)) 870360784Sdim return false; 871360784Sdim 872360784Sdim if (Info.IsVarArg && Info.IsMustTailCall) { 873360784Sdim // Now we know what's being passed to the function. Add uses to the call for 874360784Sdim // the forwarded registers that we *aren't* passing as parameters. This will 875360784Sdim // preserve the copies we build earlier. 876360784Sdim for (const auto &F : Forwards) { 877360784Sdim Register ForwardedReg = F.PReg; 878360784Sdim // If the register is already passed, or aliases a register which is 879360784Sdim // already being passed, then skip it. 880360784Sdim if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) { 881360784Sdim if (!Use.isReg()) 882360784Sdim return false; 883360784Sdim return TRI->regsOverlap(Use.getReg(), ForwardedReg); 884360784Sdim })) 885360784Sdim continue; 886360784Sdim 887360784Sdim // We aren't passing it already, so we should add it to the call. 888360784Sdim MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg)); 889360784Sdim MIB.addReg(ForwardedReg, RegState::Implicit); 890360784Sdim } 891360784Sdim } 892360784Sdim 893360784Sdim // If we have -tailcallopt, we need to adjust the stack. We'll do the call 894360784Sdim // sequence start and end here. 895360784Sdim if (!IsSibCall) { 896360784Sdim MIB->getOperand(1).setImm(FPDiff); 897360784Sdim CallSeqStart.addImm(NumBytes).addImm(0); 898360784Sdim // End the call sequence *before* emitting the call. Normally, we would 899360784Sdim // tidy the frame up after the call. However, here, we've laid out the 900360784Sdim // parameters so that when SP is reset, they will be in the correct 901360784Sdim // location. 902360784Sdim MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0); 903360784Sdim } 904360784Sdim 905360784Sdim // Now we can add the actual call instruction to the correct basic block. 906360784Sdim MIRBuilder.insertInstr(MIB); 907360784Sdim 908360784Sdim // If Callee is a reg, since it is used by a target specific instruction, 909360784Sdim // it must have a register class matching the constraint of that instruction. 910360784Sdim if (Info.Callee.isReg()) 911360784Sdim MIB->getOperand(0).setReg(constrainOperandRegClass( 912360784Sdim MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), 913360784Sdim *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee, 914360784Sdim 0)); 915360784Sdim 916360784Sdim MF.getFrameInfo().setHasTailCall(); 917360784Sdim Info.LoweredTailCall = true; 918360784Sdim return true; 919360784Sdim} 920360784Sdim 921314564Sdimbool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, 922360784Sdim CallLoweringInfo &Info) const { 923303231Sdim MachineFunction &MF = MIRBuilder.getMF(); 924327952Sdim const Function &F = MF.getFunction(); 925314564Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 926314564Sdim auto &DL = F.getParent()->getDataLayout(); 927360784Sdim const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); 928303231Sdim 929360784Sdim SmallVector<ArgInfo, 8> OutArgs; 930360784Sdim for (auto &OrigArg : Info.OrigArgs) { 931360784Sdim splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv); 932353358Sdim // AAPCS requires that we zero-extend i1 to 8 bits by the caller. 933353358Sdim if (OrigArg.Ty->isIntegerTy(1)) 934360784Sdim OutArgs.back().Flags[0].setZExt(); 935314564Sdim } 936303231Sdim 937360784Sdim SmallVector<ArgInfo, 8> InArgs; 938360784Sdim if (!Info.OrigRet.Ty->isVoidTy()) 939360784Sdim splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv()); 940360784Sdim 941360784Sdim // If we can lower as a tail call, do that instead. 942360784Sdim bool CanTailCallOpt = 943360784Sdim isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); 944360784Sdim 945360784Sdim // We must emit a tail call if we have musttail. 946360784Sdim if (Info.IsMustTailCall && !CanTailCallOpt) { 947360784Sdim // There are types of incoming/outgoing arguments we can't handle yet, so 948360784Sdim // it doesn't make sense to actually die here like in ISelLowering. Instead, 949360784Sdim // fall back to SelectionDAG and let it try to handle this. 950360784Sdim LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n"); 951360784Sdim return false; 952360784Sdim } 953360784Sdim 954360784Sdim if (CanTailCallOpt) 955360784Sdim return lowerTailCall(MIRBuilder, Info, OutArgs); 956360784Sdim 957314564Sdim // Find out which ABI gets to decide where things go. 958360784Sdim CCAssignFn *AssignFnFixed; 959360784Sdim CCAssignFn *AssignFnVarArg; 960360784Sdim std::tie(AssignFnFixed, AssignFnVarArg) = 961360784Sdim getAssignFnsForCC(Info.CallConv, TLI); 962303231Sdim 963360784Sdim MachineInstrBuilder CallSeqStart; 964360784Sdim CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); 965321369Sdim 966314564Sdim // Create a temporarily-floating call instruction so we can add the implicit 967314564Sdim // uses of arg registers. 968360784Sdim unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false); 969303231Sdim 970360784Sdim auto MIB = MIRBuilder.buildInstrNoInsert(Opc); 971360784Sdim MIB.add(Info.Callee); 972360784Sdim 973314564Sdim // Tell the call which registers are clobbered. 974344779Sdim auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 975360784Sdim const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv); 976344779Sdim if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) 977344779Sdim TRI->UpdateCustomCallPreservedMask(MF, &Mask); 978344779Sdim MIB.addRegMask(Mask); 979314564Sdim 980344779Sdim if (TRI->isAnyArgRegReserved(MF)) 981344779Sdim TRI->emitReservedArgRegCallError(MF); 982344779Sdim 983314564Sdim // Do the actual argument marshalling. 984314564Sdim SmallVector<unsigned, 8> PhysRegs; 985321369Sdim OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, 986360784Sdim AssignFnVarArg, false); 987360784Sdim if (!handleAssignments(MIRBuilder, OutArgs, Handler)) 988314564Sdim return false; 989314564Sdim 990314564Sdim // Now we can add the actual call instruction to the correct basic block. 991314564Sdim MIRBuilder.insertInstr(MIB); 992314564Sdim 993314564Sdim // If Callee is a reg, since it is used by a target specific 994314564Sdim // instruction, it must have a register class matching the 995314564Sdim // constraint of that instruction. 996360784Sdim if (Info.Callee.isReg()) 997314564Sdim MIB->getOperand(0).setReg(constrainOperandRegClass( 998314564Sdim MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), 999360784Sdim *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee, 1000360784Sdim 0)); 1001314564Sdim 1002314564Sdim // Finally we can copy the returned value back into its virtual-register. In 1003360784Sdim // symmetry with the arguments, the physical register must be an 1004314564Sdim // implicit-define of the call instruction. 1005360784Sdim if (!Info.OrigRet.Ty->isVoidTy()) { 1006360784Sdim CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv); 1007321369Sdim CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn); 1008360784Sdim if (!handleAssignments(MIRBuilder, InArgs, Handler)) 1009314564Sdim return false; 1010353358Sdim } 1011314564Sdim 1012360784Sdim if (Info.SwiftErrorVReg) { 1013353358Sdim MIB.addDef(AArch64::X21, RegState::Implicit); 1014360784Sdim MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); 1015303231Sdim } 1016314564Sdim 1017360784Sdim uint64_t CalleePopBytes = 1018360784Sdim doesCalleeRestoreStack(Info.CallConv, 1019360784Sdim MF.getTarget().Options.GuaranteedTailCallOpt) 1020360784Sdim ? alignTo(Handler.StackSize, 16) 1021360784Sdim : 0; 1022360784Sdim 1023321369Sdim CallSeqStart.addImm(Handler.StackSize).addImm(0); 1024321369Sdim MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) 1025321369Sdim .addImm(Handler.StackSize) 1026360784Sdim .addImm(CalleePopBytes); 1027321369Sdim 1028303231Sdim return true; 1029303231Sdim} 1030