1//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the interfaces that VE uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "VEISelLowering.h"
15#include "MCTargetDesc/VEMCExpr.h"
16#include "VEMachineFunctionInfo.h"
17#include "VERegisterInfo.h"
18#include "VETargetMachine.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineModuleInfo.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Module.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/KnownBits.h"
33using namespace llvm;
34
35#define DEBUG_TYPE "ve-lower"
36
37//===----------------------------------------------------------------------===//
38// Calling Convention Implementation
39//===----------------------------------------------------------------------===//
40
41static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
42                          CCValAssign::LocInfo LocInfo,
43                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
44  switch (LocVT.SimpleTy) {
45  case MVT::f32: {
46    // Allocate stack like below
47    //    0      4
48    //    +------+------+
49    //    | empty| float|
50    //    +------+------+
51    // Use align=8 for dummy area to align the beginning of these 2 area.
52    State.AllocateStack(4, Align(8)); // for empty area
53    // Use align=4 for value to place it at just after the dummy area.
54    unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area
55    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
56    return true;
57  }
58  default:
59    return false;
60  }
61}
62
63#include "VEGenCallingConv.inc"
64
65bool VETargetLowering::CanLowerReturn(
66    CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
67    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
68  CCAssignFn *RetCC = RetCC_VE;
69  SmallVector<CCValAssign, 16> RVLocs;
70  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
71  return CCInfo.CheckReturn(Outs, RetCC);
72}
73
74SDValue
75VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
76                              bool IsVarArg,
77                              const SmallVectorImpl<ISD::OutputArg> &Outs,
78                              const SmallVectorImpl<SDValue> &OutVals,
79                              const SDLoc &DL, SelectionDAG &DAG) const {
80  // CCValAssign - represent the assignment of the return value to locations.
81  SmallVector<CCValAssign, 16> RVLocs;
82
83  // CCState - Info about the registers and stack slot.
84  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
85                 *DAG.getContext());
86
87  // Analyze return values.
88  CCInfo.AnalyzeReturn(Outs, RetCC_VE);
89
90  SDValue Flag;
91  SmallVector<SDValue, 4> RetOps(1, Chain);
92
93  // Copy the result values into the output registers.
94  for (unsigned i = 0; i != RVLocs.size(); ++i) {
95    CCValAssign &VA = RVLocs[i];
96    assert(VA.isRegLoc() && "Can only return in registers!");
97    SDValue OutVal = OutVals[i];
98
99    // Integer return values must be sign or zero extended by the callee.
100    switch (VA.getLocInfo()) {
101    case CCValAssign::Full:
102      break;
103    case CCValAssign::SExt:
104      OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
105      break;
106    case CCValAssign::ZExt:
107      OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
108      break;
109    case CCValAssign::AExt:
110      OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
111      break;
112    default:
113      llvm_unreachable("Unknown loc info!");
114    }
115
116    assert(!VA.needsCustom() && "Unexpected custom lowering");
117
118    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
119
120    // Guarantee that all emitted copies are stuck together with flags.
121    Flag = Chain.getValue(1);
122    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
123  }
124
125  RetOps[0] = Chain; // Update chain.
126
127  // Add the flag if we have it.
128  if (Flag.getNode())
129    RetOps.push_back(Flag);
130
131  return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
132}
133
134SDValue VETargetLowering::LowerFormalArguments(
135    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
136    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
137    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
138  MachineFunction &MF = DAG.getMachineFunction();
139
140  // Get the base offset of the incoming arguments stack space.
141  unsigned ArgsBaseOffset = 176;
142  // Get the size of the preserved arguments area
143  unsigned ArgsPreserved = 64;
144
145  // Analyze arguments according to CC_VE.
146  SmallVector<CCValAssign, 16> ArgLocs;
147  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
148                 *DAG.getContext());
149  // Allocate the preserved area first.
150  CCInfo.AllocateStack(ArgsPreserved, Align(8));
151  // We already allocated the preserved area, so the stack offset computed
152  // by CC_VE would be correct now.
153  CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
154
155  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
156    CCValAssign &VA = ArgLocs[i];
157    if (VA.isRegLoc()) {
158      // This argument is passed in a register.
159      // All integer register arguments are promoted by the caller to i64.
160
161      // Create a virtual register for the promoted live-in value.
162      unsigned VReg =
163          MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
164      SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
165
166      // Get the high bits for i32 struct elements.
167      if (VA.getValVT() == MVT::i32 && VA.needsCustom())
168        Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
169                          DAG.getConstant(32, DL, MVT::i32));
170
171      // The caller promoted the argument, so insert an Assert?ext SDNode so we
172      // won't promote the value again in this function.
173      switch (VA.getLocInfo()) {
174      case CCValAssign::SExt:
175        Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
176                          DAG.getValueType(VA.getValVT()));
177        break;
178      case CCValAssign::ZExt:
179        Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
180                          DAG.getValueType(VA.getValVT()));
181        break;
182      default:
183        break;
184      }
185
186      // Truncate the register down to the argument type.
187      if (VA.isExtInLoc())
188        Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
189
190      InVals.push_back(Arg);
191      continue;
192    }
193
194    // The registers are exhausted. This argument was passed on the stack.
195    assert(VA.isMemLoc());
196    // The CC_VE_Full/Half functions compute stack offsets relative to the
197    // beginning of the arguments area at %fp+176.
198    unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
199    unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
200    int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
201    InVals.push_back(
202        DAG.getLoad(VA.getValVT(), DL, Chain,
203                    DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
204                    MachinePointerInfo::getFixedStack(MF, FI)));
205  }
206
207  if (!IsVarArg)
208    return Chain;
209
210  // This function takes variable arguments, some of which may have been passed
211  // in registers %s0-%s8.
212  //
213  // The va_start intrinsic needs to know the offset to the first variable
214  // argument.
215  // TODO: need to calculate offset correctly once we support f128.
216  unsigned ArgOffset = ArgLocs.size() * 8;
217  VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
218  // Skip the 176 bytes of register save area.
219  FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
220
221  return Chain;
222}
223
224// FIXME? Maybe this could be a TableGen attribute on some registers and
225// this table could be generated automatically from RegInfo.
226Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
227                                             const MachineFunction &MF) const {
228  Register Reg = StringSwitch<Register>(RegName)
229                     .Case("sp", VE::SX11)    // Stack pointer
230                     .Case("fp", VE::SX9)     // Frame pointer
231                     .Case("sl", VE::SX8)     // Stack limit
232                     .Case("lr", VE::SX10)    // Link register
233                     .Case("tp", VE::SX14)    // Thread pointer
234                     .Case("outer", VE::SX12) // Outer regiser
235                     .Case("info", VE::SX17)  // Info area register
236                     .Case("got", VE::SX15)   // Global offset table register
237                     .Case("plt", VE::SX16) // Procedure linkage table register
238                     .Default(0);
239
240  if (Reg)
241    return Reg;
242
243  report_fatal_error("Invalid register name global variable");
244}
245
246//===----------------------------------------------------------------------===//
247// TargetLowering Implementation
248//===----------------------------------------------------------------------===//
249
250SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
251                                    SmallVectorImpl<SDValue> &InVals) const {
252  SelectionDAG &DAG = CLI.DAG;
253  SDLoc DL = CLI.DL;
254  SDValue Chain = CLI.Chain;
255  auto PtrVT = getPointerTy(DAG.getDataLayout());
256
257  // VE target does not yet support tail call optimization.
258  CLI.IsTailCall = false;
259
260  // Get the base offset of the outgoing arguments stack space.
261  unsigned ArgsBaseOffset = 176;
262  // Get the size of the preserved arguments area
263  unsigned ArgsPreserved = 8 * 8u;
264
265  // Analyze operands of the call, assigning locations to each operand.
266  SmallVector<CCValAssign, 16> ArgLocs;
267  CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
268                 *DAG.getContext());
269  // Allocate the preserved area first.
270  CCInfo.AllocateStack(ArgsPreserved, Align(8));
271  // We already allocated the preserved area, so the stack offset computed
272  // by CC_VE would be correct now.
273  CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
274
275  // VE requires to use both register and stack for varargs or no-prototyped
276  // functions.
277  bool UseBoth = CLI.IsVarArg;
278
279  // Analyze operands again if it is required to store BOTH.
280  SmallVector<CCValAssign, 16> ArgLocs2;
281  CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
282                  ArgLocs2, *DAG.getContext());
283  if (UseBoth)
284    CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
285
286  // Get the size of the outgoing arguments stack space requirement.
287  unsigned ArgsSize = CCInfo.getNextStackOffset();
288
289  // Keep stack frames 16-byte aligned.
290  ArgsSize = alignTo(ArgsSize, 16);
291
292  // Adjust the stack pointer to make room for the arguments.
293  // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
294  // with more than 6 arguments.
295  Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
296
297  // Collect the set of registers to pass to the function and their values.
298  // This will be emitted as a sequence of CopyToReg nodes glued to the call
299  // instruction.
300  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
301
302  // Collect chains from all the memory opeations that copy arguments to the
303  // stack. They must follow the stack pointer adjustment above and precede the
304  // call instruction itself.
305  SmallVector<SDValue, 8> MemOpChains;
306
307  // VE needs to get address of callee function in a register
308  // So, prepare to copy it to SX12 here.
309
310  // If the callee is a GlobalAddress node (quite common, every direct call is)
311  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
312  // Likewise ExternalSymbol -> TargetExternalSymbol.
313  SDValue Callee = CLI.Callee;
314
315  bool IsPICCall = isPositionIndependent();
316
317  // PC-relative references to external symbols should go through $stub.
318  // If so, we need to prepare GlobalBaseReg first.
319  const TargetMachine &TM = DAG.getTarget();
320  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
321  const GlobalValue *GV = nullptr;
322  auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
323  if (CalleeG)
324    GV = CalleeG->getGlobal();
325  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
326  bool UsePlt = !Local;
327  MachineFunction &MF = DAG.getMachineFunction();
328
329  // Turn GlobalAddress/ExternalSymbol node into a value node
330  // containing the address of them here.
331  if (CalleeG) {
332    if (IsPICCall) {
333      if (UsePlt)
334        Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
335      Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
336      Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
337    } else {
338      Callee =
339          makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
340    }
341  } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
342    if (IsPICCall) {
343      if (UsePlt)
344        Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
345      Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
346      Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
347    } else {
348      Callee =
349          makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
350    }
351  }
352
353  RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
354
355  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
356    CCValAssign &VA = ArgLocs[i];
357    SDValue Arg = CLI.OutVals[i];
358
359    // Promote the value if needed.
360    switch (VA.getLocInfo()) {
361    default:
362      llvm_unreachable("Unknown location info!");
363    case CCValAssign::Full:
364      break;
365    case CCValAssign::SExt:
366      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
367      break;
368    case CCValAssign::ZExt:
369      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
370      break;
371    case CCValAssign::AExt:
372      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
373      break;
374    }
375
376    if (VA.isRegLoc()) {
377      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
378      if (!UseBoth)
379        continue;
380      VA = ArgLocs2[i];
381    }
382
383    assert(VA.isMemLoc());
384
385    // Create a store off the stack pointer for this argument.
386    SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
387    // The argument area starts at %fp+176 in the callee frame,
388    // %sp+176 in ours.
389    SDValue PtrOff =
390        DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
391    PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
392    MemOpChains.push_back(
393        DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
394  }
395
396  // Emit all stores, make sure they occur before the call.
397  if (!MemOpChains.empty())
398    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
399
400  // Build a sequence of CopyToReg nodes glued together with token chain and
401  // glue operands which copy the outgoing args into registers. The InGlue is
402  // necessary since all emitted instructions must be stuck together in order
403  // to pass the live physical registers.
404  SDValue InGlue;
405  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
406    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
407                             RegsToPass[i].second, InGlue);
408    InGlue = Chain.getValue(1);
409  }
410
411  // Build the operands for the call instruction itself.
412  SmallVector<SDValue, 8> Ops;
413  Ops.push_back(Chain);
414  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
415    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
416                                  RegsToPass[i].second.getValueType()));
417
418  // Add a register mask operand representing the call-preserved registers.
419  const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
420  const uint32_t *Mask =
421      TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
422  assert(Mask && "Missing call preserved mask for calling convention");
423  Ops.push_back(DAG.getRegisterMask(Mask));
424
425  // Make sure the CopyToReg nodes are glued to the call instruction which
426  // consumes the registers.
427  if (InGlue.getNode())
428    Ops.push_back(InGlue);
429
430  // Now the call itself.
431  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
432  Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
433  InGlue = Chain.getValue(1);
434
435  // Revert the stack pointer immediately after the call.
436  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
437                             DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
438  InGlue = Chain.getValue(1);
439
440  // Now extract the return values. This is more or less the same as
441  // LowerFormalArguments.
442
443  // Assign locations to each value returned by this call.
444  SmallVector<CCValAssign, 16> RVLocs;
445  CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
446                 *DAG.getContext());
447
448  // Set inreg flag manually for codegen generated library calls that
449  // return float.
450  if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
451    CLI.Ins[0].Flags.setInReg();
452
453  RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
454
455  // Copy all of the result registers out of their specified physreg.
456  for (unsigned i = 0; i != RVLocs.size(); ++i) {
457    CCValAssign &VA = RVLocs[i];
458    unsigned Reg = VA.getLocReg();
459
460    // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
461    // reside in the same register in the high and low bits. Reuse the
462    // CopyFromReg previous node to avoid duplicate copies.
463    SDValue RV;
464    if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
465      if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
466        RV = Chain.getValue(0);
467
468    // But usually we'll create a new CopyFromReg for a different register.
469    if (!RV.getNode()) {
470      RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
471      Chain = RV.getValue(1);
472      InGlue = Chain.getValue(2);
473    }
474
475    // Get the high bits for i32 struct elements.
476    if (VA.getValVT() == MVT::i32 && VA.needsCustom())
477      RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
478                       DAG.getConstant(32, DL, MVT::i32));
479
480    // The callee promoted the return value, so insert an Assert?ext SDNode so
481    // we won't promote the value again in this function.
482    switch (VA.getLocInfo()) {
483    case CCValAssign::SExt:
484      RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
485                       DAG.getValueType(VA.getValVT()));
486      break;
487    case CCValAssign::ZExt:
488      RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
489                       DAG.getValueType(VA.getValVT()));
490      break;
491    default:
492      break;
493    }
494
495    // Truncate the register down to the return value type.
496    if (VA.isExtInLoc())
497      RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
498
499    InVals.push_back(RV);
500  }
501
502  return Chain;
503}
504
505/// isFPImmLegal - Returns true if the target can instruction select the
506/// specified FP immediate natively. If false, the legalizer will
507/// materialize the FP immediate as a load from a constant pool.
508bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
509                                    bool ForCodeSize) const {
510  return VT == MVT::f32 || VT == MVT::f64;
511}
512
513/// Determine if the target supports unaligned memory accesses.
514///
515/// This function returns true if the target allows unaligned memory accesses
516/// of the specified type in the given address space. If true, it also returns
517/// whether the unaligned memory access is "fast" in the last argument by
518/// reference. This is used, for example, in situations where an array
519/// copy/move/set is converted to a sequence of store operations. Its use
520/// helps to ensure that such replacements don't generate code that causes an
521/// alignment error (trap) on the target machine.
522bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
523                                                      unsigned AddrSpace,
524                                                      unsigned Align,
525                                                      MachineMemOperand::Flags,
526                                                      bool *Fast) const {
527  if (Fast) {
528    // It's fast anytime on VE
529    *Fast = true;
530  }
531  return true;
532}
533
534bool VETargetLowering::hasAndNot(SDValue Y) const {
535  EVT VT = Y.getValueType();
536
537  // VE doesn't have vector and not instruction.
538  if (VT.isVector())
539    return false;
540
541  // VE allows different immediate values for X and Y where ~X & Y.
542  // Only simm7 works for X, and only mimm works for Y on VE.  However, this
543  // function is used to check whether an immediate value is OK for and-not
544  // instruction as both X and Y.  Generating additional instruction to
545  // retrieve an immediate value is no good since the purpose of this
546  // function is to convert a series of 3 instructions to another series of
547  // 3 instructions with better parallelism.  Therefore, we return false
548  // for all immediate values now.
549  // FIXME: Change hasAndNot function to have two operands to make it work
550  //        correctly with Aurora VE.
551  if (isa<ConstantSDNode>(Y))
552    return false;
553
554  // It's ok for generic registers.
555  return true;
556}
557
558VETargetLowering::VETargetLowering(const TargetMachine &TM,
559                                   const VESubtarget &STI)
560    : TargetLowering(TM), Subtarget(&STI) {
561  // Instructions which use registers as conditionals examine all the
562  // bits (as does the pseudo SELECT_CC expansion). I don't think it
563  // matters much whether it's ZeroOrOneBooleanContent, or
564  // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
565  // former.
566  setBooleanContents(ZeroOrOneBooleanContent);
567  setBooleanVectorContents(ZeroOrOneBooleanContent);
568
569  // Set up the register classes.
570  addRegisterClass(MVT::i32, &VE::I32RegClass);
571  addRegisterClass(MVT::i64, &VE::I64RegClass);
572  addRegisterClass(MVT::f32, &VE::F32RegClass);
573  addRegisterClass(MVT::f64, &VE::I64RegClass);
574
575  /// Load & Store {
576  for (MVT FPVT : MVT::fp_valuetypes()) {
577    for (MVT OtherFPVT : MVT::fp_valuetypes()) {
578      // Turn FP extload into load/fpextend
579      setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
580
581      // Turn FP truncstore into trunc + store.
582      setTruncStoreAction(FPVT, OtherFPVT, Expand);
583    }
584  }
585
586  // VE doesn't have i1 sign extending load
587  for (MVT VT : MVT::integer_valuetypes()) {
588    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
589    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
590    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
591    setTruncStoreAction(VT, MVT::i1, Expand);
592  }
593  /// } Load & Store
594
595  // Custom legalize address nodes into LO/HI parts.
596  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
597  setOperationAction(ISD::BlockAddress, PtrVT, Custom);
598  setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
599  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
600
601  /// VAARG handling {
602  setOperationAction(ISD::VASTART, MVT::Other, Custom);
603  // VAARG needs to be lowered to access with 8 bytes alignment.
604  setOperationAction(ISD::VAARG, MVT::Other, Custom);
605  // Use the default implementation.
606  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
607  setOperationAction(ISD::VAEND, MVT::Other, Expand);
608  /// } VAARG handling
609
610  /// Stack {
611  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
612  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
613  /// } Stack
614
615  /// Int Ops {
616  for (MVT IntVT : {MVT::i32, MVT::i64}) {
617    // VE has no REM or DIVREM operations.
618    setOperationAction(ISD::UREM, IntVT, Expand);
619    setOperationAction(ISD::SREM, IntVT, Expand);
620    setOperationAction(ISD::SDIVREM, IntVT, Expand);
621    setOperationAction(ISD::UDIVREM, IntVT, Expand);
622
623    setOperationAction(ISD::CTTZ, IntVT, Expand);
624    setOperationAction(ISD::ROTL, IntVT, Expand);
625    setOperationAction(ISD::ROTR, IntVT, Expand);
626
627    // Use isel patterns for i32 and i64
628    setOperationAction(ISD::BSWAP, IntVT, Legal);
629    setOperationAction(ISD::CTLZ, IntVT, Legal);
630    setOperationAction(ISD::CTPOP, IntVT, Legal);
631
632    // Use isel patterns for i64, Promote i32
633    LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
634    setOperationAction(ISD::BITREVERSE, IntVT, Act);
635  }
636  /// } Int Ops
637
638  /// Conversion {
639  // VE doesn't have instructions for fp<->uint, so expand them by llvm
640  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
641  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
642  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
643  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
644
645  // fp16 not supported
646  for (MVT FPVT : MVT::fp_valuetypes()) {
647    setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
648    setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
649  }
650  /// } Conversion
651
652  setStackPointerRegisterToSaveRestore(VE::SX11);
653
654  // Set function alignment to 16 bytes
655  setMinFunctionAlignment(Align(16));
656
657  // VE stores all argument by 8 bytes alignment
658  setMinStackArgumentAlignment(Align(8));
659
660  computeRegisterProperties(Subtarget->getRegisterInfo());
661}
662
663const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
664#define TARGET_NODE_CASE(NAME)                                                 \
665  case VEISD::NAME:                                                            \
666    return "VEISD::" #NAME;
667  switch ((VEISD::NodeType)Opcode) {
668  case VEISD::FIRST_NUMBER:
669    break;
670    TARGET_NODE_CASE(Lo)
671    TARGET_NODE_CASE(Hi)
672    TARGET_NODE_CASE(GETFUNPLT)
673    TARGET_NODE_CASE(GETSTACKTOP)
674    TARGET_NODE_CASE(GETTLSADDR)
675    TARGET_NODE_CASE(CALL)
676    TARGET_NODE_CASE(RET_FLAG)
677    TARGET_NODE_CASE(GLOBAL_BASE_REG)
678  }
679#undef TARGET_NODE_CASE
680  return nullptr;
681}
682
683EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
684                                         EVT VT) const {
685  return MVT::i32;
686}
687
688// Convert to a target node and set target flags.
689SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
690                                          SelectionDAG &DAG) const {
691  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
692    return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
693                                      GA->getValueType(0), GA->getOffset(), TF);
694
695  if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
696    return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
697                                     0, TF);
698
699  if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
700    return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
701                                       TF);
702
703  llvm_unreachable("Unhandled address SDNode");
704}
705
706// Split Op into high and low parts according to HiTF and LoTF.
707// Return an ADD node combining the parts.
708SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
709                                       SelectionDAG &DAG) const {
710  SDLoc DL(Op);
711  EVT VT = Op.getValueType();
712  SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
713  SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
714  return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
715}
716
717// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
718// or ExternalSymbol SDNode.
719SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
720  SDLoc DL(Op);
721  EVT PtrVT = Op.getValueType();
722
723  // Handle PIC mode first. VE needs a got load for every variable!
724  if (isPositionIndependent()) {
725    // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
726    // function has calls.
727    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
728    MFI.setHasCalls(true);
729    auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
730
731    if (isa<ConstantPoolSDNode>(Op) ||
732        (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
733      // Create following instructions for local linkage PIC code.
734      //     lea %s35, %gotoff_lo(.LCPI0_0)
735      //     and %s35, %s35, (32)0
736      //     lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
737      //     adds.l %s35, %s15, %s35                  ; %s15 is GOT
738      // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
739      SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
740                                  VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
741      SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
742      return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
743    }
744    // Create following instructions for not local linkage PIC code.
745    //     lea %s35, %got_lo(.LCPI0_0)
746    //     and %s35, %s35, (32)0
747    //     lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
748    //     adds.l %s35, %s15, %s35                  ; %s15 is GOT
749    //     ld     %s35, (,%s35)
750    // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
751    SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
752                                VEMCExpr::VK_VE_GOT_LO32, DAG);
753    SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
754    SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
755    return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
756                       MachinePointerInfo::getGOT(DAG.getMachineFunction()));
757  }
758
759  // This is one of the absolute code models.
760  switch (getTargetMachine().getCodeModel()) {
761  default:
762    llvm_unreachable("Unsupported absolute code model");
763  case CodeModel::Small:
764  case CodeModel::Medium:
765  case CodeModel::Large:
766    // abs64.
767    return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
768  }
769}
770
771/// Custom Lower {
772
773SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
774                                             SelectionDAG &DAG) const {
775  return makeAddress(Op, DAG);
776}
777
778SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
779                                            SelectionDAG &DAG) const {
780  return makeAddress(Op, DAG);
781}
782
783SDValue
784VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
785                                                SelectionDAG &DAG) const {
786  SDLoc dl(Op);
787
788  // Generate the following code:
789  //   t1: ch,glue = callseq_start t0, 0, 0
790  //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
791  //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
792  //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
793  SDValue Label = withTargetFlags(Op, 0, DAG);
794  EVT PtrVT = Op.getValueType();
795
796  // Lowering the machine isd will make sure everything is in the right
797  // location.
798  SDValue Chain = DAG.getEntryNode();
799  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
800  const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
801      DAG.getMachineFunction(), CallingConv::C);
802  Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
803  SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
804  Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
805  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
806                             DAG.getIntPtrConstant(0, dl, true),
807                             Chain.getValue(1), dl);
808  Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
809
810  // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
811  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
812  MFI.setHasCalls(true);
813
814  // Also generate code to prepare a GOT register if it is PIC.
815  if (isPositionIndependent()) {
816    MachineFunction &MF = DAG.getMachineFunction();
817    Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
818  }
819
820  return Chain;
821}
822
823SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
824                                                SelectionDAG &DAG) const {
825  // The current implementation of nld (2.26) doesn't allow local exec model
826  // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
827  // generate the general dynamic model code sequence.
828  //
829  // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
830  return LowerToTLSGeneralDynamicModel(Op, DAG);
831}
832
833SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
834  MachineFunction &MF = DAG.getMachineFunction();
835  VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
836  auto PtrVT = getPointerTy(DAG.getDataLayout());
837
838  // Need frame address to find the address of VarArgsFrameIndex.
839  MF.getFrameInfo().setFrameAddressIsTaken(true);
840
841  // vastart just stores the address of the VarArgsFrameIndex slot into the
842  // memory location argument.
843  SDLoc DL(Op);
844  SDValue Offset =
845      DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
846                  DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
847  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
848  return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
849                      MachinePointerInfo(SV));
850}
851
852SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
853  SDNode *Node = Op.getNode();
854  EVT VT = Node->getValueType(0);
855  SDValue InChain = Node->getOperand(0);
856  SDValue VAListPtr = Node->getOperand(1);
857  EVT PtrVT = VAListPtr.getValueType();
858  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
859  SDLoc DL(Node);
860  SDValue VAList =
861      DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
862  SDValue Chain = VAList.getValue(1);
863  SDValue NextPtr;
864
865  if (VT == MVT::f32) {
866    // float --> need special handling like below.
867    //    0      4
868    //    +------+------+
869    //    | empty| float|
870    //    +------+------+
871    // Increment the pointer, VAList, by 8 to the next vaarg.
872    NextPtr =
873        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
874    // Then, adjust VAList.
875    unsigned InternalOffset = 4;
876    VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
877                         DAG.getConstant(InternalOffset, DL, PtrVT));
878  } else {
879    // Increment the pointer, VAList, by 8 to the next vaarg.
880    NextPtr =
881        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
882  }
883
884  // Store the incremented VAList to the legalized pointer.
885  InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
886
887  // Load the actual argument out of the pointer VAList.
888  // We can't count on greater alignment than the word size.
889  return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
890                     std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
891}
892
893SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
894                                                  SelectionDAG &DAG) const {
895  // Generate following code.
896  //   (void)__llvm_grow_stack(size);
897  //   ret = GETSTACKTOP;        // pseudo instruction
898  SDLoc DL(Op);
899
900  // Get the inputs.
901  SDNode *Node = Op.getNode();
902  SDValue Chain = Op.getOperand(0);
903  SDValue Size = Op.getOperand(1);
904  MaybeAlign Alignment(Op.getConstantOperandVal(2));
905  EVT VT = Node->getValueType(0);
906
907  // Chain the dynamic stack allocation so that it doesn't modify the stack
908  // pointer when other instructions are using the stack.
909  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
910
911  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
912  Align StackAlign = TFI.getStackAlign();
913  bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
914
915  // Prepare arguments
916  TargetLowering::ArgListTy Args;
917  TargetLowering::ArgListEntry Entry;
918  Entry.Node = Size;
919  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
920  Args.push_back(Entry);
921  if (NeedsAlign) {
922    Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
923    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
924    Args.push_back(Entry);
925  }
926  Type *RetTy = Type::getVoidTy(*DAG.getContext());
927
928  EVT PtrVT = Op.getValueType();
929  SDValue Callee;
930  if (NeedsAlign) {
931    Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
932  } else {
933    Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
934  }
935
936  TargetLowering::CallLoweringInfo CLI(DAG);
937  CLI.setDebugLoc(DL)
938      .setChain(Chain)
939      .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
940      .setDiscardResult(true);
941  std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
942  Chain = pair.second;
943  SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
944  if (NeedsAlign) {
945    Result = DAG.getNode(ISD::ADD, DL, VT, Result,
946                         DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
947    Result = DAG.getNode(ISD::AND, DL, VT, Result,
948                         DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
949  }
950  //  Chain = Result.getValue(1);
951  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
952                             DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
953
954  SDValue Ops[2] = {Result, Chain};
955  return DAG.getMergeValues(Ops, DL);
956}
957
958SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
959  switch (Op.getOpcode()) {
960  default:
961    llvm_unreachable("Should not custom lower this!");
962  case ISD::BlockAddress:
963    return LowerBlockAddress(Op, DAG);
964  case ISD::DYNAMIC_STACKALLOC:
965    return lowerDYNAMIC_STACKALLOC(Op, DAG);
966  case ISD::GlobalAddress:
967    return LowerGlobalAddress(Op, DAG);
968  case ISD::GlobalTLSAddress:
969    return LowerGlobalTLSAddress(Op, DAG);
970  case ISD::VASTART:
971    return LowerVASTART(Op, DAG);
972  case ISD::VAARG:
973    return LowerVAARG(Op, DAG);
974  }
975}
976/// } Custom Lower
977