1//=== X86CallingConv.cpp - X86 Custom Calling Convention Impl   -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of custom routines for the X86
10// Calling Convention that aren't done by tablegen.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86CallingConv.h"
15#include "X86Subtarget.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/CodeGen/CallingConvLower.h"
18#include "llvm/IR/CallingConv.h"
19
20using namespace llvm;
21
22/// When regcall calling convention compiled to 32 bit arch, special treatment
23/// is required for 64 bit masks.
24/// The value should be assigned to two GPRs.
25/// \return true if registers were allocated and false otherwise.
26static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT,
27                                          MVT &LocVT,
28                                          CCValAssign::LocInfo &LocInfo,
29                                          ISD::ArgFlagsTy &ArgFlags,
30                                          CCState &State) {
31  // List of GPR registers that are available to store values in regcall
32  // calling convention.
33  static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI,
34                                      X86::ESI};
35
36  // The vector will save all the available registers for allocation.
37  SmallVector<unsigned, 5> AvailableRegs;
38
39  // searching for the available registers.
40  for (auto Reg : RegList) {
41    if (!State.isAllocated(Reg))
42      AvailableRegs.push_back(Reg);
43  }
44
45  const size_t RequiredGprsUponSplit = 2;
46  if (AvailableRegs.size() < RequiredGprsUponSplit)
47    return false; // Not enough free registers - continue the search.
48
49  // Allocating the available registers.
50  for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
51
52    // Marking the register as located.
53    unsigned Reg = State.AllocateReg(AvailableRegs[I]);
54
55    // Since we previously made sure that 2 registers are available
56    // we expect that a real register number will be returned.
57    assert(Reg && "Expecting a register will be available");
58
59    // Assign the value to the allocated register
60    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
61  }
62
63  // Successful in allocating registers - stop scanning next rules.
64  return true;
65}
66
67static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) {
68  if (ValVT.is512BitVector()) {
69    static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2,
70                                           X86::ZMM3, X86::ZMM4, X86::ZMM5};
71    return makeArrayRef(std::begin(RegListZMM), std::end(RegListZMM));
72  }
73
74  if (ValVT.is256BitVector()) {
75    static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2,
76                                           X86::YMM3, X86::YMM4, X86::YMM5};
77    return makeArrayRef(std::begin(RegListYMM), std::end(RegListYMM));
78  }
79
80  static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2,
81                                         X86::XMM3, X86::XMM4, X86::XMM5};
82  return makeArrayRef(std::begin(RegListXMM), std::end(RegListXMM));
83}
84
85static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() {
86  static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9};
87  return makeArrayRef(std::begin(RegListGPR), std::end(RegListGPR));
88}
89
90static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,
91                                            MVT &LocVT,
92                                            CCValAssign::LocInfo &LocInfo,
93                                            ISD::ArgFlagsTy &ArgFlags,
94                                            CCState &State) {
95
96  ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT);
97  bool Is64bit = static_cast<const X86Subtarget &>(
98                     State.getMachineFunction().getSubtarget())
99                     .is64Bit();
100
101  for (auto Reg : RegList) {
102    // If the register is not marked as allocated - assign to it.
103    if (!State.isAllocated(Reg)) {
104      unsigned AssigedReg = State.AllocateReg(Reg);
105      assert(AssigedReg == Reg && "Expecting a valid register allocation");
106      State.addLoc(
107          CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo));
108      return true;
109    }
110    // If the register is marked as shadow allocated - assign to it.
111    if (Is64bit && State.IsShadowAllocatedReg(Reg)) {
112      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
113      return true;
114    }
115  }
116
117  llvm_unreachable("Clang should ensure that hva marked vectors will have "
118                   "an available register.");
119  return false;
120}
121
122/// Vectorcall calling convention has special handling for vector types or
123/// HVA for 64 bit arch.
124/// For HVAs shadow registers might be allocated on the first pass
125/// and actual XMM registers are allocated on the second pass.
126/// For vector types, actual XMM registers are allocated on the first pass.
127/// \return true if registers were allocated and false otherwise.
128static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
129                                 CCValAssign::LocInfo &LocInfo,
130                                 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
131  // On the second pass, go through the HVAs only.
132  if (ArgFlags.isSecArgPass()) {
133    if (ArgFlags.isHva())
134      return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
135                                             ArgFlags, State);
136    return true;
137  }
138
139  // Process only vector types as defined by vectorcall spec:
140  // "A vector type is either a floating-point type, for example,
141  //  a float or double, or an SIMD vector type, for example, __m128 or __m256".
142  if (!(ValVT.isFloatingPoint() ||
143        (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
144    // If R9 was already assigned it means that we are after the fourth element
145    // and because this is not an HVA / Vector type, we need to allocate
146    // shadow XMM register.
147    if (State.isAllocated(X86::R9)) {
148      // Assign shadow XMM register.
149      (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT));
150    }
151
152    return false;
153  }
154
155  if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) {
156    // Assign shadow GPR register.
157    (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs());
158
159    // Assign XMM register - (shadow for HVA and non-shadow for non HVA).
160    if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
161      // In Vectorcall Calling convention, additional shadow stack can be
162      // created on top of the basic 32 bytes of win64.
163      // It can happen if the fifth or sixth argument is vector type or HVA.
164      // At that case for each argument a shadow stack of 8 bytes is allocated.
165      const TargetRegisterInfo *TRI =
166          State.getMachineFunction().getSubtarget().getRegisterInfo();
167      if (TRI->regsOverlap(Reg, X86::XMM4) ||
168          TRI->regsOverlap(Reg, X86::XMM5))
169        State.AllocateStack(8, Align(8));
170
171      if (!ArgFlags.isHva()) {
172        State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
173        return true; // Allocated a register - Stop the search.
174      }
175    }
176  }
177
178  // If this is an HVA - Stop the search,
179  // otherwise continue the search.
180  return ArgFlags.isHva();
181}
182
183/// Vectorcall calling convention has special handling for vector types or
184/// HVA for 32 bit arch.
185/// For HVAs actual XMM registers are allocated on the second pass.
186/// For vector types, actual XMM registers are allocated on the first pass.
187/// \return true if registers were allocated and false otherwise.
188static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
189                                 CCValAssign::LocInfo &LocInfo,
190                                 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
191  // On the second pass, go through the HVAs only.
192  if (ArgFlags.isSecArgPass()) {
193    if (ArgFlags.isHva())
194      return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
195                                             ArgFlags, State);
196    return true;
197  }
198
199  // Process only vector types as defined by vectorcall spec:
200  // "A vector type is either a floating point type, for example,
201  //  a float or double, or an SIMD vector type, for example, __m128 or __m256".
202  if (!(ValVT.isFloatingPoint() ||
203        (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
204    return false;
205  }
206
207  if (ArgFlags.isHva())
208    return true; // If this is an HVA - Stop the search.
209
210  // Assign XMM register.
211  if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
212    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
213    return true;
214  }
215
216  // In case we did not find an available XMM register for a vector -
217  // pass it indirectly.
218  // It is similar to CCPassIndirect, with the addition of inreg.
219  if (!ValVT.isFloatingPoint()) {
220    LocVT = MVT::i32;
221    LocInfo = CCValAssign::Indirect;
222    ArgFlags.setInReg();
223  }
224
225  return false; // No register was assigned - Continue the search.
226}
227
228static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
229                                CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
230                                CCState &) {
231  llvm_unreachable("The AnyReg calling convention is only supported by the "
232                   "stackmap and patchpoint intrinsics.");
233  // gracefully fallback to X86 C calling convention on Release builds.
234  return false;
235}
236
237static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
238                               CCValAssign::LocInfo &LocInfo,
239                               ISD::ArgFlagsTy &ArgFlags, CCState &State) {
240  // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
241  // not to split i64 and double between a register and stack
242  static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
243  static const unsigned NumRegs = sizeof(RegList) / sizeof(RegList[0]);
244
245  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
246
247  // If this is the first part of an double/i64/i128, or if we're already
248  // in the middle of a split, add to the pending list. If this is not
249  // the end of the split, return, otherwise go on to process the pending
250  // list
251  if (ArgFlags.isSplit() || !PendingMembers.empty()) {
252    PendingMembers.push_back(
253        CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
254    if (!ArgFlags.isSplitEnd())
255      return true;
256  }
257
258  // If there are no pending members, we are not in the middle of a split,
259  // so do the usual inreg stuff.
260  if (PendingMembers.empty()) {
261    if (unsigned Reg = State.AllocateReg(RegList)) {
262      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
263      return true;
264    }
265    return false;
266  }
267
268  assert(ArgFlags.isSplitEnd());
269
270  // We now have the entire original argument in PendingMembers, so decide
271  // whether to use registers or the stack.
272  // Per the MCU ABI:
273  // a) To use registers, we need to have enough of them free to contain
274  // the entire argument.
275  // b) We never want to use more than 2 registers for a single argument.
276
277  unsigned FirstFree = State.getFirstUnallocated(RegList);
278  bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
279
280  for (auto &It : PendingMembers) {
281    if (UseRegs)
282      It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
283    else
284      It.convertToMem(State.AllocateStack(4, Align(4)));
285    State.addLoc(It);
286  }
287
288  PendingMembers.clear();
289
290  return true;
291}
292
293/// X86 interrupt handlers can only take one or two stack arguments, but if
294/// there are two arguments, they are in the opposite order from the standard
295/// convention. Therefore, we have to look at the argument count up front before
296/// allocating stack for each argument.
297static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
298                        CCValAssign::LocInfo &LocInfo,
299                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
300  const MachineFunction &MF = State.getMachineFunction();
301  size_t ArgCount = State.getMachineFunction().getFunction().arg_size();
302  bool Is64Bit = static_cast<const X86Subtarget &>(MF.getSubtarget()).is64Bit();
303  unsigned SlotSize = Is64Bit ? 8 : 4;
304  unsigned Offset;
305  if (ArgCount == 1 && ValNo == 0) {
306    // If we have one argument, the argument is five stack slots big, at fixed
307    // offset zero.
308    Offset = State.AllocateStack(5 * SlotSize, Align(4));
309  } else if (ArgCount == 2 && ValNo == 0) {
310    // If we have two arguments, the stack slot is *after* the error code
311    // argument. Pretend it doesn't consume stack space, and account for it when
312    // we assign the second argument.
313    Offset = SlotSize;
314  } else if (ArgCount == 2 && ValNo == 1) {
315    // If this is the second of two arguments, it must be the error code. It
316    // appears first on the stack, and is then followed by the five slot
317    // interrupt struct.
318    Offset = 0;
319    (void)State.AllocateStack(6 * SlotSize, Align(4));
320  } else {
321    report_fatal_error("unsupported x86 interrupt prototype");
322  }
323
324  // FIXME: This should be accounted for in
325  // X86FrameLowering::getFrameIndexReference, not here.
326  if (Is64Bit && ArgCount == 2)
327    Offset += SlotSize;
328
329  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
330  return true;
331}
332
333// Provides entry points of CC_X86 and RetCC_X86.
334#include "X86GenCallingConv.inc"
335