1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
16#include "AArch64CallingConvention.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
19#include "MCTargetDesc/AArch64AddressingModes.h"
20#include "Utils/AArch64BaseInfo.h"
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/APInt.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/Analysis/BranchProbabilityInfo.h"
26#include "llvm/CodeGen/CallingConvLower.h"
27#include "llvm/CodeGen/FastISel.h"
28#include "llvm/CodeGen/FunctionLoweringInfo.h"
29#include "llvm/CodeGen/ISDOpcodes.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineConstantPool.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineInstr.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineMemOperand.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/RuntimeLibcalls.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/IR/Argument.h"
40#include "llvm/IR/Attributes.h"
41#include "llvm/IR/BasicBlock.h"
42#include "llvm/IR/CallingConv.h"
43#include "llvm/IR/Constant.h"
44#include "llvm/IR/Constants.h"
45#include "llvm/IR/DataLayout.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GetElementPtrTypeIterator.h"
49#include "llvm/IR/GlobalValue.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
52#include "llvm/IR/Instructions.h"
53#include "llvm/IR/IntrinsicInst.h"
54#include "llvm/IR/Intrinsics.h"
55#include "llvm/IR/Operator.h"
56#include "llvm/IR/Type.h"
57#include "llvm/IR/User.h"
58#include "llvm/IR/Value.h"
59#include "llvm/MC/MCInstrDesc.h"
60#include "llvm/MC/MCRegisterInfo.h"
61#include "llvm/MC/MCSymbol.h"
62#include "llvm/Support/AtomicOrdering.h"
63#include "llvm/Support/Casting.h"
64#include "llvm/Support/CodeGen.h"
65#include "llvm/Support/Compiler.h"
66#include "llvm/Support/ErrorHandling.h"
67#include "llvm/Support/MachineValueType.h"
68#include "llvm/Support/MathExtras.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <iterator>
73#include <utility>
74
75using namespace llvm;
76
77namespace {
78
79class AArch64FastISel final : public FastISel {
80  class Address {
81  public:
82    using BaseKind = enum {
83      RegBase,
84      FrameIndexBase
85    };
86
87  private:
88    BaseKind Kind = RegBase;
89    AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
90    union {
91      unsigned Reg;
92      int FI;
93    } Base;
94    unsigned OffsetReg = 0;
95    unsigned Shift = 0;
96    int64_t Offset = 0;
97    const GlobalValue *GV = nullptr;
98
99  public:
100    Address() { Base.Reg = 0; }
101
102    void setKind(BaseKind K) { Kind = K; }
103    BaseKind getKind() const { return Kind; }
104    void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105    AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106    bool isRegBase() const { return Kind == RegBase; }
107    bool isFIBase() const { return Kind == FrameIndexBase; }
108
109    void setReg(unsigned Reg) {
110      assert(isRegBase() && "Invalid base register access!");
111      Base.Reg = Reg;
112    }
113
114    unsigned getReg() const {
115      assert(isRegBase() && "Invalid base register access!");
116      return Base.Reg;
117    }
118
119    void setOffsetReg(unsigned Reg) {
120      OffsetReg = Reg;
121    }
122
123    unsigned getOffsetReg() const {
124      return OffsetReg;
125    }
126
127    void setFI(unsigned FI) {
128      assert(isFIBase() && "Invalid base frame index  access!");
129      Base.FI = FI;
130    }
131
132    unsigned getFI() const {
133      assert(isFIBase() && "Invalid base frame index access!");
134      return Base.FI;
135    }
136
137    void setOffset(int64_t O) { Offset = O; }
138    int64_t getOffset() { return Offset; }
139    void setShift(unsigned S) { Shift = S; }
140    unsigned getShift() { return Shift; }
141
142    void setGlobalValue(const GlobalValue *G) { GV = G; }
143    const GlobalValue *getGlobalValue() { return GV; }
144  };
145
146  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147  /// make the right decision when generating code for different targets.
148  const AArch64Subtarget *Subtarget;
149  LLVMContext *Context;
150
151  bool fastLowerArguments() override;
152  bool fastLowerCall(CallLoweringInfo &CLI) override;
153  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154
155private:
156  // Selection routines.
157  bool selectAddSub(const Instruction *I);
158  bool selectLogicalOp(const Instruction *I);
159  bool selectLoad(const Instruction *I);
160  bool selectStore(const Instruction *I);
161  bool selectBranch(const Instruction *I);
162  bool selectIndirectBr(const Instruction *I);
163  bool selectCmp(const Instruction *I);
164  bool selectSelect(const Instruction *I);
165  bool selectFPExt(const Instruction *I);
166  bool selectFPTrunc(const Instruction *I);
167  bool selectFPToInt(const Instruction *I, bool Signed);
168  bool selectIntToFP(const Instruction *I, bool Signed);
169  bool selectRem(const Instruction *I, unsigned ISDOpcode);
170  bool selectRet(const Instruction *I);
171  bool selectTrunc(const Instruction *I);
172  bool selectIntExt(const Instruction *I);
173  bool selectMul(const Instruction *I);
174  bool selectShift(const Instruction *I);
175  bool selectBitCast(const Instruction *I);
176  bool selectFRem(const Instruction *I);
177  bool selectSDiv(const Instruction *I);
178  bool selectGetElementPtr(const Instruction *I);
179  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180
181  // Utility helper routines.
182  bool isTypeLegal(Type *Ty, MVT &VT);
183  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184  bool isValueAvailable(const Value *V) const;
185  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186  bool computeCallAddress(const Value *V, Address &Addr);
187  bool simplifyAddress(Address &Addr, MVT VT);
188  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
189                            MachineMemOperand::Flags Flags,
190                            unsigned ScaleFactor, MachineMemOperand *MMO);
191  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193                          unsigned Alignment);
194  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195                         const Value *Cond);
196  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197  bool optimizeSelect(const SelectInst *SI);
198  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
199
200  // Emit helper routines.
201  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202                      const Value *RHS, bool SetFlags = false,
203                      bool WantResult = true,  bool IsZExt = false);
204  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206                         bool SetFlags = false, bool WantResult = true);
207  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208                         bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209                         bool WantResult = true);
210  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212                         AArch64_AM::ShiftExtendType ShiftType,
213                         uint64_t ShiftImm, bool SetFlags = false,
214                         bool WantResult = true);
215  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
217                          AArch64_AM::ShiftExtendType ExtType,
218                          uint64_t ShiftImm, bool SetFlags = false,
219                         bool WantResult = true);
220
221  // Emit functions.
222  bool emitCompareAndBranch(const BranchInst *BI);
223  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228                    MachineMemOperand *MMO = nullptr);
229  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230                 MachineMemOperand *MMO = nullptr);
231  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232                        MachineMemOperand *MMO = nullptr);
233  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236                   bool SetFlags = false, bool WantResult = true,
237                   bool IsZExt = false);
238  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240                   bool SetFlags = false, bool WantResult = true,
241                   bool IsZExt = false);
242  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243                       unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245                       unsigned RHSReg, bool RHSIsKill,
246                       AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247                       bool WantResult = true);
248  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249                         const Value *RHS);
250  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251                            bool LHSIsKill, uint64_t Imm);
252  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254                            uint64_t ShiftImm);
255  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257                      unsigned Op1, bool Op1IsKill);
258  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259                        unsigned Op1, bool Op1IsKill);
260  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261                        unsigned Op1, bool Op1IsKill);
262  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263                      unsigned Op1Reg, bool Op1IsKill);
264  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265                      uint64_t Imm, bool IsZExt = true);
266  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267                      unsigned Op1Reg, bool Op1IsKill);
268  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269                      uint64_t Imm, bool IsZExt = true);
270  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271                      unsigned Op1Reg, bool Op1IsKill);
272  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273                      uint64_t Imm, bool IsZExt = false);
274
275  unsigned materializeInt(const ConstantInt *CI, MVT VT);
276  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277  unsigned materializeGV(const GlobalValue *GV);
278
279  // Call handling routines.
280private:
281  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283                       unsigned &NumBytes);
284  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
285
286public:
287  // Backend specific FastISel code.
288  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289  unsigned fastMaterializeConstant(const Constant *C) override;
290  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
291
292  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293                           const TargetLibraryInfo *LibInfo)
294      : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295    Subtarget =
296        &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297    Context = &FuncInfo.Fn->getContext();
298  }
299
300  bool fastSelectInstruction(const Instruction *I) override;
301
302#include "AArch64GenFastISel.inc"
303};
304
305} // end anonymous namespace
306
307/// Check if the sign-/zero-extend will be a noop.
308static bool isIntExtFree(const Instruction *I) {
309  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310         "Unexpected integer extend instruction.");
311  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312         "Unexpected value type.");
313  bool IsZExt = isa<ZExtInst>(I);
314
315  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316    if (LI->hasOneUse())
317      return true;
318
319  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320    if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
321      return true;
322
323  return false;
324}
325
326/// Determine the implicit scale factor that is applied by a memory
327/// operation for a given value type.
328static unsigned getImplicitScaleFactor(MVT VT) {
329  switch (VT.SimpleTy) {
330  default:
331    return 0;    // invalid
332  case MVT::i1:  // fall-through
333  case MVT::i8:
334    return 1;
335  case MVT::i16:
336    return 2;
337  case MVT::i32: // fall-through
338  case MVT::f32:
339    return 4;
340  case MVT::i64: // fall-through
341  case MVT::f64:
342    return 8;
343  }
344}
345
346CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347  if (CC == CallingConv::WebKit_JS)
348    return CC_AArch64_WebKit_JS;
349  if (CC == CallingConv::GHC)
350    return CC_AArch64_GHC;
351  if (CC == CallingConv::CFGuard_Check)
352    return CC_AArch64_Win64_CFGuard_Check;
353  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
354}
355
356unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
357  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
358         "Alloca should always return a pointer.");
359
360  // Don't handle dynamic allocas.
361  if (!FuncInfo.StaticAllocaMap.count(AI))
362    return 0;
363
364  DenseMap<const AllocaInst *, int>::iterator SI =
365      FuncInfo.StaticAllocaMap.find(AI);
366
367  if (SI != FuncInfo.StaticAllocaMap.end()) {
368    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
369    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
370            ResultReg)
371        .addFrameIndex(SI->second)
372        .addImm(0)
373        .addImm(0);
374    return ResultReg;
375  }
376
377  return 0;
378}
379
380unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
381  if (VT > MVT::i64)
382    return 0;
383
384  if (!CI->isZero())
385    return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
386
387  // Create a copy from the zero register to materialize a "0" value.
388  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
389                                                   : &AArch64::GPR32RegClass;
390  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
391  unsigned ResultReg = createResultReg(RC);
392  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
393          ResultReg).addReg(ZeroReg, getKillRegState(true));
394  return ResultReg;
395}
396
397unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
398  // Positive zero (+0.0) has to be materialized with a fmov from the zero
399  // register, because the immediate version of fmov cannot encode zero.
400  if (CFP->isNullValue())
401    return fastMaterializeFloatZero(CFP);
402
403  if (VT != MVT::f32 && VT != MVT::f64)
404    return 0;
405
406  const APFloat Val = CFP->getValueAPF();
407  bool Is64Bit = (VT == MVT::f64);
408  // This checks to see if we can use FMOV instructions to materialize
409  // a constant, otherwise we have to materialize via the constant pool.
410  int Imm =
411      Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
412  if (Imm != -1) {
413    unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
414    return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
415  }
416
417  // For the MachO large code model materialize the FP constant in code.
418  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
419    unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
420    const TargetRegisterClass *RC = Is64Bit ?
421        &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
422
423    unsigned TmpReg = createResultReg(RC);
424    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
425        .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
426
427    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
428    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
429            TII.get(TargetOpcode::COPY), ResultReg)
430        .addReg(TmpReg, getKillRegState(true));
431
432    return ResultReg;
433  }
434
435  // Materialize via constant pool.  MachineConstantPool wants an explicit
436  // alignment.
437  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
438  if (Align == 0)
439    Align = DL.getTypeAllocSize(CFP->getType());
440
441  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
442  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
444          ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
445
446  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
447  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
448  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
449      .addReg(ADRPReg)
450      .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
451  return ResultReg;
452}
453
454unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
455  // We can't handle thread-local variables quickly yet.
456  if (GV->isThreadLocal())
457    return 0;
458
459  // MachO still uses GOT for large code-model accesses, but ELF requires
460  // movz/movk sequences, which FastISel doesn't handle yet.
461  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
462    return 0;
463
464  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
465
466  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
467  if (!DestEVT.isSimple())
468    return 0;
469
470  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
471  unsigned ResultReg;
472
473  if (OpFlags & AArch64II::MO_GOT) {
474    // ADRP + LDRX
475    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
476            ADRPReg)
477        .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
478
479    unsigned LdrOpc;
480    if (Subtarget->isTargetILP32()) {
481      ResultReg = createResultReg(&AArch64::GPR32RegClass);
482      LdrOpc = AArch64::LDRWui;
483    } else {
484      ResultReg = createResultReg(&AArch64::GPR64RegClass);
485      LdrOpc = AArch64::LDRXui;
486    }
487    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
488            ResultReg)
489      .addReg(ADRPReg)
490      .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
491                        AArch64II::MO_NC | OpFlags);
492    if (!Subtarget->isTargetILP32())
493      return ResultReg;
494
495    // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
496    // so we must extend the result on ILP32.
497    unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
498    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
499            TII.get(TargetOpcode::SUBREG_TO_REG))
500        .addDef(Result64)
501        .addImm(0)
502        .addReg(ResultReg, RegState::Kill)
503        .addImm(AArch64::sub_32);
504    return Result64;
505  } else {
506    // ADRP + ADDX
507    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
508            ADRPReg)
509        .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
510
511    ResultReg = createResultReg(&AArch64::GPR64spRegClass);
512    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
513            ResultReg)
514        .addReg(ADRPReg)
515        .addGlobalAddress(GV, 0,
516                          AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
517        .addImm(0);
518  }
519  return ResultReg;
520}
521
522unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
523  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
524
525  // Only handle simple types.
526  if (!CEVT.isSimple())
527    return 0;
528  MVT VT = CEVT.getSimpleVT();
529  // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
530  // 'null' pointers need to have a somewhat special treatment.
531  if (const auto *CPN = dyn_cast<ConstantPointerNull>(C)) {
532    (void)CPN;
533    assert(CPN->getType()->getPointerAddressSpace() == 0 &&
534           "Unexpected address space");
535    assert(VT == MVT::i64 && "Expected 64-bit pointers");
536    return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
537  }
538
539  if (const auto *CI = dyn_cast<ConstantInt>(C))
540    return materializeInt(CI, VT);
541  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
542    return materializeFP(CFP, VT);
543  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
544    return materializeGV(GV);
545
546  return 0;
547}
548
549unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
550  assert(CFP->isNullValue() &&
551         "Floating-point constant is not a positive zero.");
552  MVT VT;
553  if (!isTypeLegal(CFP->getType(), VT))
554    return 0;
555
556  if (VT != MVT::f32 && VT != MVT::f64)
557    return 0;
558
559  bool Is64Bit = (VT == MVT::f64);
560  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
561  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
562  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
563}
564
565/// Check if the multiply is by a power-of-2 constant.
566static bool isMulPowOf2(const Value *I) {
567  if (const auto *MI = dyn_cast<MulOperator>(I)) {
568    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
569      if (C->getValue().isPowerOf2())
570        return true;
571    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
572      if (C->getValue().isPowerOf2())
573        return true;
574  }
575  return false;
576}
577
578// Computes the address to get to an object.
579bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
580{
581  const User *U = nullptr;
582  unsigned Opcode = Instruction::UserOp1;
583  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
584    // Don't walk into other basic blocks unless the object is an alloca from
585    // another block, otherwise it may not have a virtual register assigned.
586    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
587        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
588      Opcode = I->getOpcode();
589      U = I;
590    }
591  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
592    Opcode = C->getOpcode();
593    U = C;
594  }
595
596  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
597    if (Ty->getAddressSpace() > 255)
598      // Fast instruction selection doesn't support the special
599      // address spaces.
600      return false;
601
602  switch (Opcode) {
603  default:
604    break;
605  case Instruction::BitCast:
606    // Look through bitcasts.
607    return computeAddress(U->getOperand(0), Addr, Ty);
608
609  case Instruction::IntToPtr:
610    // Look past no-op inttoptrs.
611    if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
612        TLI.getPointerTy(DL))
613      return computeAddress(U->getOperand(0), Addr, Ty);
614    break;
615
616  case Instruction::PtrToInt:
617    // Look past no-op ptrtoints.
618    if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
619      return computeAddress(U->getOperand(0), Addr, Ty);
620    break;
621
622  case Instruction::GetElementPtr: {
623    Address SavedAddr = Addr;
624    uint64_t TmpOffset = Addr.getOffset();
625
626    // Iterate through the GEP folding the constants into offsets where
627    // we can.
628    for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
629         GTI != E; ++GTI) {
630      const Value *Op = GTI.getOperand();
631      if (StructType *STy = GTI.getStructTypeOrNull()) {
632        const StructLayout *SL = DL.getStructLayout(STy);
633        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
634        TmpOffset += SL->getElementOffset(Idx);
635      } else {
636        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
637        while (true) {
638          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
639            // Constant-offset addressing.
640            TmpOffset += CI->getSExtValue() * S;
641            break;
642          }
643          if (canFoldAddIntoGEP(U, Op)) {
644            // A compatible add with a constant operand. Fold the constant.
645            ConstantInt *CI =
646                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
647            TmpOffset += CI->getSExtValue() * S;
648            // Iterate on the other operand.
649            Op = cast<AddOperator>(Op)->getOperand(0);
650            continue;
651          }
652          // Unsupported
653          goto unsupported_gep;
654        }
655      }
656    }
657
658    // Try to grab the base operand now.
659    Addr.setOffset(TmpOffset);
660    if (computeAddress(U->getOperand(0), Addr, Ty))
661      return true;
662
663    // We failed, restore everything and try the other options.
664    Addr = SavedAddr;
665
666  unsupported_gep:
667    break;
668  }
669  case Instruction::Alloca: {
670    const AllocaInst *AI = cast<AllocaInst>(Obj);
671    DenseMap<const AllocaInst *, int>::iterator SI =
672        FuncInfo.StaticAllocaMap.find(AI);
673    if (SI != FuncInfo.StaticAllocaMap.end()) {
674      Addr.setKind(Address::FrameIndexBase);
675      Addr.setFI(SI->second);
676      return true;
677    }
678    break;
679  }
680  case Instruction::Add: {
681    // Adds of constants are common and easy enough.
682    const Value *LHS = U->getOperand(0);
683    const Value *RHS = U->getOperand(1);
684
685    if (isa<ConstantInt>(LHS))
686      std::swap(LHS, RHS);
687
688    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
689      Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
690      return computeAddress(LHS, Addr, Ty);
691    }
692
693    Address Backup = Addr;
694    if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
695      return true;
696    Addr = Backup;
697
698    break;
699  }
700  case Instruction::Sub: {
701    // Subs of constants are common and easy enough.
702    const Value *LHS = U->getOperand(0);
703    const Value *RHS = U->getOperand(1);
704
705    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
706      Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
707      return computeAddress(LHS, Addr, Ty);
708    }
709    break;
710  }
711  case Instruction::Shl: {
712    if (Addr.getOffsetReg())
713      break;
714
715    const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
716    if (!CI)
717      break;
718
719    unsigned Val = CI->getZExtValue();
720    if (Val < 1 || Val > 3)
721      break;
722
723    uint64_t NumBytes = 0;
724    if (Ty && Ty->isSized()) {
725      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
726      NumBytes = NumBits / 8;
727      if (!isPowerOf2_64(NumBits))
728        NumBytes = 0;
729    }
730
731    if (NumBytes != (1ULL << Val))
732      break;
733
734    Addr.setShift(Val);
735    Addr.setExtendType(AArch64_AM::LSL);
736
737    const Value *Src = U->getOperand(0);
738    if (const auto *I = dyn_cast<Instruction>(Src)) {
739      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
740        // Fold the zext or sext when it won't become a noop.
741        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
742          if (!isIntExtFree(ZE) &&
743              ZE->getOperand(0)->getType()->isIntegerTy(32)) {
744            Addr.setExtendType(AArch64_AM::UXTW);
745            Src = ZE->getOperand(0);
746          }
747        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
748          if (!isIntExtFree(SE) &&
749              SE->getOperand(0)->getType()->isIntegerTy(32)) {
750            Addr.setExtendType(AArch64_AM::SXTW);
751            Src = SE->getOperand(0);
752          }
753        }
754      }
755    }
756
757    if (const auto *AI = dyn_cast<BinaryOperator>(Src))
758      if (AI->getOpcode() == Instruction::And) {
759        const Value *LHS = AI->getOperand(0);
760        const Value *RHS = AI->getOperand(1);
761
762        if (const auto *C = dyn_cast<ConstantInt>(LHS))
763          if (C->getValue() == 0xffffffff)
764            std::swap(LHS, RHS);
765
766        if (const auto *C = dyn_cast<ConstantInt>(RHS))
767          if (C->getValue() == 0xffffffff) {
768            Addr.setExtendType(AArch64_AM::UXTW);
769            unsigned Reg = getRegForValue(LHS);
770            if (!Reg)
771              return false;
772            bool RegIsKill = hasTrivialKill(LHS);
773            Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
774                                             AArch64::sub_32);
775            Addr.setOffsetReg(Reg);
776            return true;
777          }
778      }
779
780    unsigned Reg = getRegForValue(Src);
781    if (!Reg)
782      return false;
783    Addr.setOffsetReg(Reg);
784    return true;
785  }
786  case Instruction::Mul: {
787    if (Addr.getOffsetReg())
788      break;
789
790    if (!isMulPowOf2(U))
791      break;
792
793    const Value *LHS = U->getOperand(0);
794    const Value *RHS = U->getOperand(1);
795
796    // Canonicalize power-of-2 value to the RHS.
797    if (const auto *C = dyn_cast<ConstantInt>(LHS))
798      if (C->getValue().isPowerOf2())
799        std::swap(LHS, RHS);
800
801    assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
802    const auto *C = cast<ConstantInt>(RHS);
803    unsigned Val = C->getValue().logBase2();
804    if (Val < 1 || Val > 3)
805      break;
806
807    uint64_t NumBytes = 0;
808    if (Ty && Ty->isSized()) {
809      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
810      NumBytes = NumBits / 8;
811      if (!isPowerOf2_64(NumBits))
812        NumBytes = 0;
813    }
814
815    if (NumBytes != (1ULL << Val))
816      break;
817
818    Addr.setShift(Val);
819    Addr.setExtendType(AArch64_AM::LSL);
820
821    const Value *Src = LHS;
822    if (const auto *I = dyn_cast<Instruction>(Src)) {
823      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
824        // Fold the zext or sext when it won't become a noop.
825        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
826          if (!isIntExtFree(ZE) &&
827              ZE->getOperand(0)->getType()->isIntegerTy(32)) {
828            Addr.setExtendType(AArch64_AM::UXTW);
829            Src = ZE->getOperand(0);
830          }
831        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
832          if (!isIntExtFree(SE) &&
833              SE->getOperand(0)->getType()->isIntegerTy(32)) {
834            Addr.setExtendType(AArch64_AM::SXTW);
835            Src = SE->getOperand(0);
836          }
837        }
838      }
839    }
840
841    unsigned Reg = getRegForValue(Src);
842    if (!Reg)
843      return false;
844    Addr.setOffsetReg(Reg);
845    return true;
846  }
847  case Instruction::And: {
848    if (Addr.getOffsetReg())
849      break;
850
851    if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
852      break;
853
854    const Value *LHS = U->getOperand(0);
855    const Value *RHS = U->getOperand(1);
856
857    if (const auto *C = dyn_cast<ConstantInt>(LHS))
858      if (C->getValue() == 0xffffffff)
859        std::swap(LHS, RHS);
860
861    if (const auto *C = dyn_cast<ConstantInt>(RHS))
862      if (C->getValue() == 0xffffffff) {
863        Addr.setShift(0);
864        Addr.setExtendType(AArch64_AM::LSL);
865        Addr.setExtendType(AArch64_AM::UXTW);
866
867        unsigned Reg = getRegForValue(LHS);
868        if (!Reg)
869          return false;
870        bool RegIsKill = hasTrivialKill(LHS);
871        Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
872                                         AArch64::sub_32);
873        Addr.setOffsetReg(Reg);
874        return true;
875      }
876    break;
877  }
878  case Instruction::SExt:
879  case Instruction::ZExt: {
880    if (!Addr.getReg() || Addr.getOffsetReg())
881      break;
882
883    const Value *Src = nullptr;
884    // Fold the zext or sext when it won't become a noop.
885    if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
886      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
887        Addr.setExtendType(AArch64_AM::UXTW);
888        Src = ZE->getOperand(0);
889      }
890    } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
891      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
892        Addr.setExtendType(AArch64_AM::SXTW);
893        Src = SE->getOperand(0);
894      }
895    }
896
897    if (!Src)
898      break;
899
900    Addr.setShift(0);
901    unsigned Reg = getRegForValue(Src);
902    if (!Reg)
903      return false;
904    Addr.setOffsetReg(Reg);
905    return true;
906  }
907  } // end switch
908
909  if (Addr.isRegBase() && !Addr.getReg()) {
910    unsigned Reg = getRegForValue(Obj);
911    if (!Reg)
912      return false;
913    Addr.setReg(Reg);
914    return true;
915  }
916
917  if (!Addr.getOffsetReg()) {
918    unsigned Reg = getRegForValue(Obj);
919    if (!Reg)
920      return false;
921    Addr.setOffsetReg(Reg);
922    return true;
923  }
924
925  return false;
926}
927
928bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
929  const User *U = nullptr;
930  unsigned Opcode = Instruction::UserOp1;
931  bool InMBB = true;
932
933  if (const auto *I = dyn_cast<Instruction>(V)) {
934    Opcode = I->getOpcode();
935    U = I;
936    InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
937  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
938    Opcode = C->getOpcode();
939    U = C;
940  }
941
942  switch (Opcode) {
943  default: break;
944  case Instruction::BitCast:
945    // Look past bitcasts if its operand is in the same BB.
946    if (InMBB)
947      return computeCallAddress(U->getOperand(0), Addr);
948    break;
949  case Instruction::IntToPtr:
950    // Look past no-op inttoptrs if its operand is in the same BB.
951    if (InMBB &&
952        TLI.getValueType(DL, U->getOperand(0)->getType()) ==
953            TLI.getPointerTy(DL))
954      return computeCallAddress(U->getOperand(0), Addr);
955    break;
956  case Instruction::PtrToInt:
957    // Look past no-op ptrtoints if its operand is in the same BB.
958    if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
959      return computeCallAddress(U->getOperand(0), Addr);
960    break;
961  }
962
963  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
964    Addr.setGlobalValue(GV);
965    return true;
966  }
967
968  // If all else fails, try to materialize the value in a register.
969  if (!Addr.getGlobalValue()) {
970    Addr.setReg(getRegForValue(V));
971    return Addr.getReg() != 0;
972  }
973
974  return false;
975}
976
977bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
978  EVT evt = TLI.getValueType(DL, Ty, true);
979
980  if (Subtarget->isTargetILP32() && Ty->isPointerTy())
981    return false;
982
983  // Only handle simple types.
984  if (evt == MVT::Other || !evt.isSimple())
985    return false;
986  VT = evt.getSimpleVT();
987
988  // This is a legal type, but it's not something we handle in fast-isel.
989  if (VT == MVT::f128)
990    return false;
991
992  // Handle all other legal types, i.e. a register that will directly hold this
993  // value.
994  return TLI.isTypeLegal(VT);
995}
996
997/// Determine if the value type is supported by FastISel.
998///
999/// FastISel for AArch64 can handle more value types than are legal. This adds
1000/// simple value type such as i1, i8, and i16.
1001bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1002  if (Ty->isVectorTy() && !IsVectorAllowed)
1003    return false;
1004
1005  if (isTypeLegal(Ty, VT))
1006    return true;
1007
1008  // If this is a type than can be sign or zero-extended to a basic operation
1009  // go ahead and accept it now.
1010  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1011    return true;
1012
1013  return false;
1014}
1015
1016bool AArch64FastISel::isValueAvailable(const Value *V) const {
1017  if (!isa<Instruction>(V))
1018    return true;
1019
1020  const auto *I = cast<Instruction>(V);
1021  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1022}
1023
1024bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1025  if (Subtarget->isTargetILP32())
1026    return false;
1027
1028  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1029  if (!ScaleFactor)
1030    return false;
1031
1032  bool ImmediateOffsetNeedsLowering = false;
1033  bool RegisterOffsetNeedsLowering = false;
1034  int64_t Offset = Addr.getOffset();
1035  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1036    ImmediateOffsetNeedsLowering = true;
1037  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1038           !isUInt<12>(Offset / ScaleFactor))
1039    ImmediateOffsetNeedsLowering = true;
1040
1041  // Cannot encode an offset register and an immediate offset in the same
1042  // instruction. Fold the immediate offset into the load/store instruction and
1043  // emit an additional add to take care of the offset register.
1044  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1045    RegisterOffsetNeedsLowering = true;
1046
1047  // Cannot encode zero register as base.
1048  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1049    RegisterOffsetNeedsLowering = true;
1050
1051  // If this is a stack pointer and the offset needs to be simplified then put
1052  // the alloca address into a register, set the base type back to register and
1053  // continue. This should almost never happen.
1054  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1055  {
1056    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1057    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1058            ResultReg)
1059      .addFrameIndex(Addr.getFI())
1060      .addImm(0)
1061      .addImm(0);
1062    Addr.setKind(Address::RegBase);
1063    Addr.setReg(ResultReg);
1064  }
1065
1066  if (RegisterOffsetNeedsLowering) {
1067    unsigned ResultReg = 0;
1068    if (Addr.getReg()) {
1069      if (Addr.getExtendType() == AArch64_AM::SXTW ||
1070          Addr.getExtendType() == AArch64_AM::UXTW   )
1071        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1072                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1073                                  /*TODO:IsKill=*/false, Addr.getExtendType(),
1074                                  Addr.getShift());
1075      else
1076        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1077                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1078                                  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1079                                  Addr.getShift());
1080    } else {
1081      if (Addr.getExtendType() == AArch64_AM::UXTW)
1082        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1083                               /*Op0IsKill=*/false, Addr.getShift(),
1084                               /*IsZExt=*/true);
1085      else if (Addr.getExtendType() == AArch64_AM::SXTW)
1086        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1087                               /*Op0IsKill=*/false, Addr.getShift(),
1088                               /*IsZExt=*/false);
1089      else
1090        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1091                               /*Op0IsKill=*/false, Addr.getShift());
1092    }
1093    if (!ResultReg)
1094      return false;
1095
1096    Addr.setReg(ResultReg);
1097    Addr.setOffsetReg(0);
1098    Addr.setShift(0);
1099    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1100  }
1101
1102  // Since the offset is too large for the load/store instruction get the
1103  // reg+offset into a register.
1104  if (ImmediateOffsetNeedsLowering) {
1105    unsigned ResultReg;
1106    if (Addr.getReg())
1107      // Try to fold the immediate into the add instruction.
1108      ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1109    else
1110      ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1111
1112    if (!ResultReg)
1113      return false;
1114    Addr.setReg(ResultReg);
1115    Addr.setOffset(0);
1116  }
1117  return true;
1118}
1119
1120void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1121                                           const MachineInstrBuilder &MIB,
1122                                           MachineMemOperand::Flags Flags,
1123                                           unsigned ScaleFactor,
1124                                           MachineMemOperand *MMO) {
1125  int64_t Offset = Addr.getOffset() / ScaleFactor;
1126  // Frame base works a bit differently. Handle it separately.
1127  if (Addr.isFIBase()) {
1128    int FI = Addr.getFI();
1129    // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1130    // and alignment should be based on the VT.
1131    MMO = FuncInfo.MF->getMachineMemOperand(
1132        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1133        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1134    // Now add the rest of the operands.
1135    MIB.addFrameIndex(FI).addImm(Offset);
1136  } else {
1137    assert(Addr.isRegBase() && "Unexpected address kind.");
1138    const MCInstrDesc &II = MIB->getDesc();
1139    unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1140    Addr.setReg(
1141      constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1142    Addr.setOffsetReg(
1143      constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1144    if (Addr.getOffsetReg()) {
1145      assert(Addr.getOffset() == 0 && "Unexpected offset");
1146      bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1147                      Addr.getExtendType() == AArch64_AM::SXTX;
1148      MIB.addReg(Addr.getReg());
1149      MIB.addReg(Addr.getOffsetReg());
1150      MIB.addImm(IsSigned);
1151      MIB.addImm(Addr.getShift() != 0);
1152    } else
1153      MIB.addReg(Addr.getReg()).addImm(Offset);
1154  }
1155
1156  if (MMO)
1157    MIB.addMemOperand(MMO);
1158}
1159
1160unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1161                                     const Value *RHS, bool SetFlags,
1162                                     bool WantResult,  bool IsZExt) {
1163  AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1164  bool NeedExtend = false;
1165  switch (RetVT.SimpleTy) {
1166  default:
1167    return 0;
1168  case MVT::i1:
1169    NeedExtend = true;
1170    break;
1171  case MVT::i8:
1172    NeedExtend = true;
1173    ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1174    break;
1175  case MVT::i16:
1176    NeedExtend = true;
1177    ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1178    break;
1179  case MVT::i32:  // fall-through
1180  case MVT::i64:
1181    break;
1182  }
1183  MVT SrcVT = RetVT;
1184  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1185
1186  // Canonicalize immediates to the RHS first.
1187  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1188    std::swap(LHS, RHS);
1189
1190  // Canonicalize mul by power of 2 to the RHS.
1191  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1192    if (isMulPowOf2(LHS))
1193      std::swap(LHS, RHS);
1194
1195  // Canonicalize shift immediate to the RHS.
1196  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197    if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1198      if (isa<ConstantInt>(SI->getOperand(1)))
1199        if (SI->getOpcode() == Instruction::Shl  ||
1200            SI->getOpcode() == Instruction::LShr ||
1201            SI->getOpcode() == Instruction::AShr   )
1202          std::swap(LHS, RHS);
1203
1204  unsigned LHSReg = getRegForValue(LHS);
1205  if (!LHSReg)
1206    return 0;
1207  bool LHSIsKill = hasTrivialKill(LHS);
1208
1209  if (NeedExtend)
1210    LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1211
1212  unsigned ResultReg = 0;
1213  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1214    uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1215    if (C->isNegative())
1216      ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1217                                SetFlags, WantResult);
1218    else
1219      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1220                                WantResult);
1221  } else if (const auto *C = dyn_cast<Constant>(RHS))
1222    if (C->isNullValue())
1223      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1224                                WantResult);
1225
1226  if (ResultReg)
1227    return ResultReg;
1228
1229  // Only extend the RHS within the instruction if there is a valid extend type.
1230  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1231      isValueAvailable(RHS)) {
1232    if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1233      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1234        if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1235          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1236          if (!RHSReg)
1237            return 0;
1238          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1239          return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1240                               RHSIsKill, ExtendType, C->getZExtValue(),
1241                               SetFlags, WantResult);
1242        }
1243    unsigned RHSReg = getRegForValue(RHS);
1244    if (!RHSReg)
1245      return 0;
1246    bool RHSIsKill = hasTrivialKill(RHS);
1247    return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1248                         ExtendType, 0, SetFlags, WantResult);
1249  }
1250
1251  // Check if the mul can be folded into the instruction.
1252  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1253    if (isMulPowOf2(RHS)) {
1254      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1255      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1256
1257      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1258        if (C->getValue().isPowerOf2())
1259          std::swap(MulLHS, MulRHS);
1260
1261      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1262      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1263      unsigned RHSReg = getRegForValue(MulLHS);
1264      if (!RHSReg)
1265        return 0;
1266      bool RHSIsKill = hasTrivialKill(MulLHS);
1267      ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1268                                RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1269                                WantResult);
1270      if (ResultReg)
1271        return ResultReg;
1272    }
1273  }
1274
1275  // Check if the shift can be folded into the instruction.
1276  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1277    if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1278      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1279        AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1280        switch (SI->getOpcode()) {
1281        default: break;
1282        case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1283        case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1284        case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1285        }
1286        uint64_t ShiftVal = C->getZExtValue();
1287        if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1288          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1289          if (!RHSReg)
1290            return 0;
1291          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1292          ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1293                                    RHSIsKill, ShiftType, ShiftVal, SetFlags,
1294                                    WantResult);
1295          if (ResultReg)
1296            return ResultReg;
1297        }
1298      }
1299    }
1300  }
1301
1302  unsigned RHSReg = getRegForValue(RHS);
1303  if (!RHSReg)
1304    return 0;
1305  bool RHSIsKill = hasTrivialKill(RHS);
1306
1307  if (NeedExtend)
1308    RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1309
1310  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1311                       SetFlags, WantResult);
1312}
1313
1314unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1315                                        bool LHSIsKill, unsigned RHSReg,
1316                                        bool RHSIsKill, bool SetFlags,
1317                                        bool WantResult) {
1318  assert(LHSReg && RHSReg && "Invalid register number.");
1319
1320  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1321      RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1322    return 0;
1323
1324  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325    return 0;
1326
1327  static const unsigned OpcTable[2][2][2] = {
1328    { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1329      { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1330    { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1331      { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1332  };
1333  bool Is64Bit = RetVT == MVT::i64;
1334  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1335  const TargetRegisterClass *RC =
1336      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1337  unsigned ResultReg;
1338  if (WantResult)
1339    ResultReg = createResultReg(RC);
1340  else
1341    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1342
1343  const MCInstrDesc &II = TII.get(Opc);
1344  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1345  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1346  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1347      .addReg(LHSReg, getKillRegState(LHSIsKill))
1348      .addReg(RHSReg, getKillRegState(RHSIsKill));
1349  return ResultReg;
1350}
1351
1352unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1353                                        bool LHSIsKill, uint64_t Imm,
1354                                        bool SetFlags, bool WantResult) {
1355  assert(LHSReg && "Invalid register number.");
1356
1357  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1358    return 0;
1359
1360  unsigned ShiftImm;
1361  if (isUInt<12>(Imm))
1362    ShiftImm = 0;
1363  else if ((Imm & 0xfff000) == Imm) {
1364    ShiftImm = 12;
1365    Imm >>= 12;
1366  } else
1367    return 0;
1368
1369  static const unsigned OpcTable[2][2][2] = {
1370    { { AArch64::SUBWri,  AArch64::SUBXri  },
1371      { AArch64::ADDWri,  AArch64::ADDXri  }  },
1372    { { AArch64::SUBSWri, AArch64::SUBSXri },
1373      { AArch64::ADDSWri, AArch64::ADDSXri }  }
1374  };
1375  bool Is64Bit = RetVT == MVT::i64;
1376  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1377  const TargetRegisterClass *RC;
1378  if (SetFlags)
1379    RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1380  else
1381    RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1382  unsigned ResultReg;
1383  if (WantResult)
1384    ResultReg = createResultReg(RC);
1385  else
1386    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1387
1388  const MCInstrDesc &II = TII.get(Opc);
1389  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1390  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1391      .addReg(LHSReg, getKillRegState(LHSIsKill))
1392      .addImm(Imm)
1393      .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1394  return ResultReg;
1395}
1396
1397unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1398                                        bool LHSIsKill, unsigned RHSReg,
1399                                        bool RHSIsKill,
1400                                        AArch64_AM::ShiftExtendType ShiftType,
1401                                        uint64_t ShiftImm, bool SetFlags,
1402                                        bool WantResult) {
1403  assert(LHSReg && RHSReg && "Invalid register number.");
1404  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1405         RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1406
1407  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1408    return 0;
1409
1410  // Don't deal with undefined shifts.
1411  if (ShiftImm >= RetVT.getSizeInBits())
1412    return 0;
1413
1414  static const unsigned OpcTable[2][2][2] = {
1415    { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1416      { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1417    { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1418      { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1419  };
1420  bool Is64Bit = RetVT == MVT::i64;
1421  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1422  const TargetRegisterClass *RC =
1423      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1424  unsigned ResultReg;
1425  if (WantResult)
1426    ResultReg = createResultReg(RC);
1427  else
1428    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1429
1430  const MCInstrDesc &II = TII.get(Opc);
1431  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1432  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1433  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1434      .addReg(LHSReg, getKillRegState(LHSIsKill))
1435      .addReg(RHSReg, getKillRegState(RHSIsKill))
1436      .addImm(getShifterImm(ShiftType, ShiftImm));
1437  return ResultReg;
1438}
1439
1440unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1441                                        bool LHSIsKill, unsigned RHSReg,
1442                                        bool RHSIsKill,
1443                                        AArch64_AM::ShiftExtendType ExtType,
1444                                        uint64_t ShiftImm, bool SetFlags,
1445                                        bool WantResult) {
1446  assert(LHSReg && RHSReg && "Invalid register number.");
1447  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1448         RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1449
1450  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1451    return 0;
1452
1453  if (ShiftImm >= 4)
1454    return 0;
1455
1456  static const unsigned OpcTable[2][2][2] = {
1457    { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1458      { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1459    { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1460      { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1461  };
1462  bool Is64Bit = RetVT == MVT::i64;
1463  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1464  const TargetRegisterClass *RC = nullptr;
1465  if (SetFlags)
1466    RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1467  else
1468    RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1469  unsigned ResultReg;
1470  if (WantResult)
1471    ResultReg = createResultReg(RC);
1472  else
1473    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1474
1475  const MCInstrDesc &II = TII.get(Opc);
1476  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1477  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1478  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1479      .addReg(LHSReg, getKillRegState(LHSIsKill))
1480      .addReg(RHSReg, getKillRegState(RHSIsKill))
1481      .addImm(getArithExtendImm(ExtType, ShiftImm));
1482  return ResultReg;
1483}
1484
1485bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1486  Type *Ty = LHS->getType();
1487  EVT EVT = TLI.getValueType(DL, Ty, true);
1488  if (!EVT.isSimple())
1489    return false;
1490  MVT VT = EVT.getSimpleVT();
1491
1492  switch (VT.SimpleTy) {
1493  default:
1494    return false;
1495  case MVT::i1:
1496  case MVT::i8:
1497  case MVT::i16:
1498  case MVT::i32:
1499  case MVT::i64:
1500    return emitICmp(VT, LHS, RHS, IsZExt);
1501  case MVT::f32:
1502  case MVT::f64:
1503    return emitFCmp(VT, LHS, RHS);
1504  }
1505}
1506
1507bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1508                               bool IsZExt) {
1509  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1510                 IsZExt) != 0;
1511}
1512
1513bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1514                                  uint64_t Imm) {
1515  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1516                       /*SetFlags=*/true, /*WantResult=*/false) != 0;
1517}
1518
1519bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1520  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1521    return false;
1522
1523  // Check to see if the 2nd operand is a constant that we can encode directly
1524  // in the compare.
1525  bool UseImm = false;
1526  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1527    if (CFP->isZero() && !CFP->isNegative())
1528      UseImm = true;
1529
1530  unsigned LHSReg = getRegForValue(LHS);
1531  if (!LHSReg)
1532    return false;
1533  bool LHSIsKill = hasTrivialKill(LHS);
1534
1535  if (UseImm) {
1536    unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1537    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1538        .addReg(LHSReg, getKillRegState(LHSIsKill));
1539    return true;
1540  }
1541
1542  unsigned RHSReg = getRegForValue(RHS);
1543  if (!RHSReg)
1544    return false;
1545  bool RHSIsKill = hasTrivialKill(RHS);
1546
1547  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1548  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1549      .addReg(LHSReg, getKillRegState(LHSIsKill))
1550      .addReg(RHSReg, getKillRegState(RHSIsKill));
1551  return true;
1552}
1553
1554unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1555                                  bool SetFlags, bool WantResult, bool IsZExt) {
1556  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1557                    IsZExt);
1558}
1559
1560/// This method is a wrapper to simplify add emission.
1561///
1562/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1563/// that fails, then try to materialize the immediate into a register and use
1564/// emitAddSub_rr instead.
1565unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1566                                      int64_t Imm) {
1567  unsigned ResultReg;
1568  if (Imm < 0)
1569    ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1570  else
1571    ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1572
1573  if (ResultReg)
1574    return ResultReg;
1575
1576  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1577  if (!CReg)
1578    return 0;
1579
1580  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1581  return ResultReg;
1582}
1583
1584unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1585                                  bool SetFlags, bool WantResult, bool IsZExt) {
1586  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1587                    IsZExt);
1588}
1589
1590unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1591                                      bool LHSIsKill, unsigned RHSReg,
1592                                      bool RHSIsKill, bool WantResult) {
1593  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1594                       RHSIsKill, /*SetFlags=*/true, WantResult);
1595}
1596
1597unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1598                                      bool LHSIsKill, unsigned RHSReg,
1599                                      bool RHSIsKill,
1600                                      AArch64_AM::ShiftExtendType ShiftType,
1601                                      uint64_t ShiftImm, bool WantResult) {
1602  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1603                       RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1604                       WantResult);
1605}
1606
1607unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1608                                        const Value *LHS, const Value *RHS) {
1609  // Canonicalize immediates to the RHS first.
1610  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1611    std::swap(LHS, RHS);
1612
1613  // Canonicalize mul by power-of-2 to the RHS.
1614  if (LHS->hasOneUse() && isValueAvailable(LHS))
1615    if (isMulPowOf2(LHS))
1616      std::swap(LHS, RHS);
1617
1618  // Canonicalize shift immediate to the RHS.
1619  if (LHS->hasOneUse() && isValueAvailable(LHS))
1620    if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1621      if (isa<ConstantInt>(SI->getOperand(1)))
1622        std::swap(LHS, RHS);
1623
1624  unsigned LHSReg = getRegForValue(LHS);
1625  if (!LHSReg)
1626    return 0;
1627  bool LHSIsKill = hasTrivialKill(LHS);
1628
1629  unsigned ResultReg = 0;
1630  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1631    uint64_t Imm = C->getZExtValue();
1632    ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1633  }
1634  if (ResultReg)
1635    return ResultReg;
1636
1637  // Check if the mul can be folded into the instruction.
1638  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1639    if (isMulPowOf2(RHS)) {
1640      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1641      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1642
1643      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1644        if (C->getValue().isPowerOf2())
1645          std::swap(MulLHS, MulRHS);
1646
1647      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1648      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1649
1650      unsigned RHSReg = getRegForValue(MulLHS);
1651      if (!RHSReg)
1652        return 0;
1653      bool RHSIsKill = hasTrivialKill(MulLHS);
1654      ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1655                                   RHSIsKill, ShiftVal);
1656      if (ResultReg)
1657        return ResultReg;
1658    }
1659  }
1660
1661  // Check if the shift can be folded into the instruction.
1662  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1663    if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1664      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1665        uint64_t ShiftVal = C->getZExtValue();
1666        unsigned RHSReg = getRegForValue(SI->getOperand(0));
1667        if (!RHSReg)
1668          return 0;
1669        bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1670        ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1671                                     RHSIsKill, ShiftVal);
1672        if (ResultReg)
1673          return ResultReg;
1674      }
1675  }
1676
1677  unsigned RHSReg = getRegForValue(RHS);
1678  if (!RHSReg)
1679    return 0;
1680  bool RHSIsKill = hasTrivialKill(RHS);
1681
1682  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1683  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1684  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1685    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1686    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1687  }
1688  return ResultReg;
1689}
1690
1691unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1692                                           unsigned LHSReg, bool LHSIsKill,
1693                                           uint64_t Imm) {
1694  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1695                "ISD nodes are not consecutive!");
1696  static const unsigned OpcTable[3][2] = {
1697    { AArch64::ANDWri, AArch64::ANDXri },
1698    { AArch64::ORRWri, AArch64::ORRXri },
1699    { AArch64::EORWri, AArch64::EORXri }
1700  };
1701  const TargetRegisterClass *RC;
1702  unsigned Opc;
1703  unsigned RegSize;
1704  switch (RetVT.SimpleTy) {
1705  default:
1706    return 0;
1707  case MVT::i1:
1708  case MVT::i8:
1709  case MVT::i16:
1710  case MVT::i32: {
1711    unsigned Idx = ISDOpc - ISD::AND;
1712    Opc = OpcTable[Idx][0];
1713    RC = &AArch64::GPR32spRegClass;
1714    RegSize = 32;
1715    break;
1716  }
1717  case MVT::i64:
1718    Opc = OpcTable[ISDOpc - ISD::AND][1];
1719    RC = &AArch64::GPR64spRegClass;
1720    RegSize = 64;
1721    break;
1722  }
1723
1724  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1725    return 0;
1726
1727  unsigned ResultReg =
1728      fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1729                      AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1730  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1731    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1732    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1733  }
1734  return ResultReg;
1735}
1736
1737unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1738                                           unsigned LHSReg, bool LHSIsKill,
1739                                           unsigned RHSReg, bool RHSIsKill,
1740                                           uint64_t ShiftImm) {
1741  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1742                "ISD nodes are not consecutive!");
1743  static const unsigned OpcTable[3][2] = {
1744    { AArch64::ANDWrs, AArch64::ANDXrs },
1745    { AArch64::ORRWrs, AArch64::ORRXrs },
1746    { AArch64::EORWrs, AArch64::EORXrs }
1747  };
1748
1749  // Don't deal with undefined shifts.
1750  if (ShiftImm >= RetVT.getSizeInBits())
1751    return 0;
1752
1753  const TargetRegisterClass *RC;
1754  unsigned Opc;
1755  switch (RetVT.SimpleTy) {
1756  default:
1757    return 0;
1758  case MVT::i1:
1759  case MVT::i8:
1760  case MVT::i16:
1761  case MVT::i32:
1762    Opc = OpcTable[ISDOpc - ISD::AND][0];
1763    RC = &AArch64::GPR32RegClass;
1764    break;
1765  case MVT::i64:
1766    Opc = OpcTable[ISDOpc - ISD::AND][1];
1767    RC = &AArch64::GPR64RegClass;
1768    break;
1769  }
1770  unsigned ResultReg =
1771      fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1772                       AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1773  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1774    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1775    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1776  }
1777  return ResultReg;
1778}
1779
1780unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1781                                     uint64_t Imm) {
1782  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1783}
1784
1785unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1786                                   bool WantZExt, MachineMemOperand *MMO) {
1787  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1788    return 0;
1789
1790  // Simplify this down to something we can handle.
1791  if (!simplifyAddress(Addr, VT))
1792    return 0;
1793
1794  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1795  if (!ScaleFactor)
1796    llvm_unreachable("Unexpected value type.");
1797
1798  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1799  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1800  bool UseScaled = true;
1801  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1802    UseScaled = false;
1803    ScaleFactor = 1;
1804  }
1805
1806  static const unsigned GPOpcTable[2][8][4] = {
1807    // Sign-extend.
1808    { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1809        AArch64::LDURXi  },
1810      { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1811        AArch64::LDURXi  },
1812      { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1813        AArch64::LDRXui  },
1814      { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1815        AArch64::LDRXui  },
1816      { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1817        AArch64::LDRXroX },
1818      { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1819        AArch64::LDRXroX },
1820      { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1821        AArch64::LDRXroW },
1822      { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1823        AArch64::LDRXroW }
1824    },
1825    // Zero-extend.
1826    { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1827        AArch64::LDURXi  },
1828      { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1829        AArch64::LDURXi  },
1830      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1831        AArch64::LDRXui  },
1832      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1833        AArch64::LDRXui  },
1834      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1835        AArch64::LDRXroX },
1836      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1837        AArch64::LDRXroX },
1838      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1839        AArch64::LDRXroW },
1840      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1841        AArch64::LDRXroW }
1842    }
1843  };
1844
1845  static const unsigned FPOpcTable[4][2] = {
1846    { AArch64::LDURSi,  AArch64::LDURDi  },
1847    { AArch64::LDRSui,  AArch64::LDRDui  },
1848    { AArch64::LDRSroX, AArch64::LDRDroX },
1849    { AArch64::LDRSroW, AArch64::LDRDroW }
1850  };
1851
1852  unsigned Opc;
1853  const TargetRegisterClass *RC;
1854  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1855                      Addr.getOffsetReg();
1856  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1857  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1858      Addr.getExtendType() == AArch64_AM::SXTW)
1859    Idx++;
1860
1861  bool IsRet64Bit = RetVT == MVT::i64;
1862  switch (VT.SimpleTy) {
1863  default:
1864    llvm_unreachable("Unexpected value type.");
1865  case MVT::i1: // Intentional fall-through.
1866  case MVT::i8:
1867    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1868    RC = (IsRet64Bit && !WantZExt) ?
1869             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1870    break;
1871  case MVT::i16:
1872    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1873    RC = (IsRet64Bit && !WantZExt) ?
1874             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1875    break;
1876  case MVT::i32:
1877    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1878    RC = (IsRet64Bit && !WantZExt) ?
1879             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1880    break;
1881  case MVT::i64:
1882    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1883    RC = &AArch64::GPR64RegClass;
1884    break;
1885  case MVT::f32:
1886    Opc = FPOpcTable[Idx][0];
1887    RC = &AArch64::FPR32RegClass;
1888    break;
1889  case MVT::f64:
1890    Opc = FPOpcTable[Idx][1];
1891    RC = &AArch64::FPR64RegClass;
1892    break;
1893  }
1894
1895  // Create the base instruction, then add the operands.
1896  unsigned ResultReg = createResultReg(RC);
1897  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1898                                    TII.get(Opc), ResultReg);
1899  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1900
1901  // Loading an i1 requires special handling.
1902  if (VT == MVT::i1) {
1903    unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1904    assert(ANDReg && "Unexpected AND instruction emission failure.");
1905    ResultReg = ANDReg;
1906  }
1907
1908  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1909  // the 32bit reg to a 64bit reg.
1910  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1911    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1912    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1913            TII.get(AArch64::SUBREG_TO_REG), Reg64)
1914        .addImm(0)
1915        .addReg(ResultReg, getKillRegState(true))
1916        .addImm(AArch64::sub_32);
1917    ResultReg = Reg64;
1918  }
1919  return ResultReg;
1920}
1921
1922bool AArch64FastISel::selectAddSub(const Instruction *I) {
1923  MVT VT;
1924  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1925    return false;
1926
1927  if (VT.isVector())
1928    return selectOperator(I, I->getOpcode());
1929
1930  unsigned ResultReg;
1931  switch (I->getOpcode()) {
1932  default:
1933    llvm_unreachable("Unexpected instruction.");
1934  case Instruction::Add:
1935    ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1936    break;
1937  case Instruction::Sub:
1938    ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1939    break;
1940  }
1941  if (!ResultReg)
1942    return false;
1943
1944  updateValueMap(I, ResultReg);
1945  return true;
1946}
1947
1948bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1949  MVT VT;
1950  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1951    return false;
1952
1953  if (VT.isVector())
1954    return selectOperator(I, I->getOpcode());
1955
1956  unsigned ResultReg;
1957  switch (I->getOpcode()) {
1958  default:
1959    llvm_unreachable("Unexpected instruction.");
1960  case Instruction::And:
1961    ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1962    break;
1963  case Instruction::Or:
1964    ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1965    break;
1966  case Instruction::Xor:
1967    ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1968    break;
1969  }
1970  if (!ResultReg)
1971    return false;
1972
1973  updateValueMap(I, ResultReg);
1974  return true;
1975}
1976
1977bool AArch64FastISel::selectLoad(const Instruction *I) {
1978  MVT VT;
1979  // Verify we have a legal type before going any further.  Currently, we handle
1980  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1981  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1982  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1983      cast<LoadInst>(I)->isAtomic())
1984    return false;
1985
1986  const Value *SV = I->getOperand(0);
1987  if (TLI.supportSwiftError()) {
1988    // Swifterror values can come from either a function parameter with
1989    // swifterror attribute or an alloca with swifterror attribute.
1990    if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1991      if (Arg->hasSwiftErrorAttr())
1992        return false;
1993    }
1994
1995    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1996      if (Alloca->isSwiftError())
1997        return false;
1998    }
1999  }
2000
2001  // See if we can handle this address.
2002  Address Addr;
2003  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
2004    return false;
2005
2006  // Fold the following sign-/zero-extend into the load instruction.
2007  bool WantZExt = true;
2008  MVT RetVT = VT;
2009  const Value *IntExtVal = nullptr;
2010  if (I->hasOneUse()) {
2011    if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
2012      if (isTypeSupported(ZE->getType(), RetVT))
2013        IntExtVal = ZE;
2014      else
2015        RetVT = VT;
2016    } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
2017      if (isTypeSupported(SE->getType(), RetVT))
2018        IntExtVal = SE;
2019      else
2020        RetVT = VT;
2021      WantZExt = false;
2022    }
2023  }
2024
2025  unsigned ResultReg =
2026      emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
2027  if (!ResultReg)
2028    return false;
2029
2030  // There are a few different cases we have to handle, because the load or the
2031  // sign-/zero-extend might not be selected by FastISel if we fall-back to
2032  // SelectionDAG. There is also an ordering issue when both instructions are in
2033  // different basic blocks.
2034  // 1.) The load instruction is selected by FastISel, but the integer extend
2035  //     not. This usually happens when the integer extend is in a different
2036  //     basic block and SelectionDAG took over for that basic block.
2037  // 2.) The load instruction is selected before the integer extend. This only
2038  //     happens when the integer extend is in a different basic block.
2039  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2040  //     by FastISel. This happens if there are instructions between the load
2041  //     and the integer extend that couldn't be selected by FastISel.
2042  if (IntExtVal) {
2043    // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2044    // could select it. Emit a copy to subreg if necessary. FastISel will remove
2045    // it when it selects the integer extend.
2046    unsigned Reg = lookUpRegForValue(IntExtVal);
2047    auto *MI = MRI.getUniqueVRegDef(Reg);
2048    if (!MI) {
2049      if (RetVT == MVT::i64 && VT <= MVT::i32) {
2050        if (WantZExt) {
2051          // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2052          MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2053          ResultReg = std::prev(I)->getOperand(0).getReg();
2054          removeDeadCode(I, std::next(I));
2055        } else
2056          ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2057                                                 /*IsKill=*/true,
2058                                                 AArch64::sub_32);
2059      }
2060      updateValueMap(I, ResultReg);
2061      return true;
2062    }
2063
2064    // The integer extend has already been emitted - delete all the instructions
2065    // that have been emitted by the integer extend lowering code and use the
2066    // result from the load instruction directly.
2067    while (MI) {
2068      Reg = 0;
2069      for (auto &Opnd : MI->uses()) {
2070        if (Opnd.isReg()) {
2071          Reg = Opnd.getReg();
2072          break;
2073        }
2074      }
2075      MachineBasicBlock::iterator I(MI);
2076      removeDeadCode(I, std::next(I));
2077      MI = nullptr;
2078      if (Reg)
2079        MI = MRI.getUniqueVRegDef(Reg);
2080    }
2081    updateValueMap(IntExtVal, ResultReg);
2082    return true;
2083  }
2084
2085  updateValueMap(I, ResultReg);
2086  return true;
2087}
2088
2089bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2090                                       unsigned AddrReg,
2091                                       MachineMemOperand *MMO) {
2092  unsigned Opc;
2093  switch (VT.SimpleTy) {
2094  default: return false;
2095  case MVT::i8:  Opc = AArch64::STLRB; break;
2096  case MVT::i16: Opc = AArch64::STLRH; break;
2097  case MVT::i32: Opc = AArch64::STLRW; break;
2098  case MVT::i64: Opc = AArch64::STLRX; break;
2099  }
2100
2101  const MCInstrDesc &II = TII.get(Opc);
2102  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2103  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2104  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2105      .addReg(SrcReg)
2106      .addReg(AddrReg)
2107      .addMemOperand(MMO);
2108  return true;
2109}
2110
2111bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2112                                MachineMemOperand *MMO) {
2113  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2114    return false;
2115
2116  // Simplify this down to something we can handle.
2117  if (!simplifyAddress(Addr, VT))
2118    return false;
2119
2120  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2121  if (!ScaleFactor)
2122    llvm_unreachable("Unexpected value type.");
2123
2124  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2125  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2126  bool UseScaled = true;
2127  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2128    UseScaled = false;
2129    ScaleFactor = 1;
2130  }
2131
2132  static const unsigned OpcTable[4][6] = {
2133    { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2134      AArch64::STURSi,   AArch64::STURDi },
2135    { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2136      AArch64::STRSui,   AArch64::STRDui },
2137    { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2138      AArch64::STRSroX,  AArch64::STRDroX },
2139    { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2140      AArch64::STRSroW,  AArch64::STRDroW }
2141  };
2142
2143  unsigned Opc;
2144  bool VTIsi1 = false;
2145  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2146                      Addr.getOffsetReg();
2147  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2148  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2149      Addr.getExtendType() == AArch64_AM::SXTW)
2150    Idx++;
2151
2152  switch (VT.SimpleTy) {
2153  default: llvm_unreachable("Unexpected value type.");
2154  case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2155  case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2156  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2157  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2158  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2159  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2160  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2161  }
2162
2163  // Storing an i1 requires special handling.
2164  if (VTIsi1 && SrcReg != AArch64::WZR) {
2165    unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2166    assert(ANDReg && "Unexpected AND instruction emission failure.");
2167    SrcReg = ANDReg;
2168  }
2169  // Create the base instruction, then add the operands.
2170  const MCInstrDesc &II = TII.get(Opc);
2171  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2172  MachineInstrBuilder MIB =
2173      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2174  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2175
2176  return true;
2177}
2178
2179bool AArch64FastISel::selectStore(const Instruction *I) {
2180  MVT VT;
2181  const Value *Op0 = I->getOperand(0);
2182  // Verify we have a legal type before going any further.  Currently, we handle
2183  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2184  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2185  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2186    return false;
2187
2188  const Value *PtrV = I->getOperand(1);
2189  if (TLI.supportSwiftError()) {
2190    // Swifterror values can come from either a function parameter with
2191    // swifterror attribute or an alloca with swifterror attribute.
2192    if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2193      if (Arg->hasSwiftErrorAttr())
2194        return false;
2195    }
2196
2197    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2198      if (Alloca->isSwiftError())
2199        return false;
2200    }
2201  }
2202
2203  // Get the value to be stored into a register. Use the zero register directly
2204  // when possible to avoid an unnecessary copy and a wasted register.
2205  unsigned SrcReg = 0;
2206  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2207    if (CI->isZero())
2208      SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2209  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2210    if (CF->isZero() && !CF->isNegative()) {
2211      VT = MVT::getIntegerVT(VT.getSizeInBits());
2212      SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2213    }
2214  }
2215
2216  if (!SrcReg)
2217    SrcReg = getRegForValue(Op0);
2218
2219  if (!SrcReg)
2220    return false;
2221
2222  auto *SI = cast<StoreInst>(I);
2223
2224  // Try to emit a STLR for seq_cst/release.
2225  if (SI->isAtomic()) {
2226    AtomicOrdering Ord = SI->getOrdering();
2227    // The non-atomic instructions are sufficient for relaxed stores.
2228    if (isReleaseOrStronger(Ord)) {
2229      // The STLR addressing mode only supports a base reg; pass that directly.
2230      unsigned AddrReg = getRegForValue(PtrV);
2231      return emitStoreRelease(VT, SrcReg, AddrReg,
2232                              createMachineMemOperandFor(I));
2233    }
2234  }
2235
2236  // See if we can handle this address.
2237  Address Addr;
2238  if (!computeAddress(PtrV, Addr, Op0->getType()))
2239    return false;
2240
2241  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2242    return false;
2243  return true;
2244}
2245
2246static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2247  switch (Pred) {
2248  case CmpInst::FCMP_ONE:
2249  case CmpInst::FCMP_UEQ:
2250  default:
2251    // AL is our "false" for now. The other two need more compares.
2252    return AArch64CC::AL;
2253  case CmpInst::ICMP_EQ:
2254  case CmpInst::FCMP_OEQ:
2255    return AArch64CC::EQ;
2256  case CmpInst::ICMP_SGT:
2257  case CmpInst::FCMP_OGT:
2258    return AArch64CC::GT;
2259  case CmpInst::ICMP_SGE:
2260  case CmpInst::FCMP_OGE:
2261    return AArch64CC::GE;
2262  case CmpInst::ICMP_UGT:
2263  case CmpInst::FCMP_UGT:
2264    return AArch64CC::HI;
2265  case CmpInst::FCMP_OLT:
2266    return AArch64CC::MI;
2267  case CmpInst::ICMP_ULE:
2268  case CmpInst::FCMP_OLE:
2269    return AArch64CC::LS;
2270  case CmpInst::FCMP_ORD:
2271    return AArch64CC::VC;
2272  case CmpInst::FCMP_UNO:
2273    return AArch64CC::VS;
2274  case CmpInst::FCMP_UGE:
2275    return AArch64CC::PL;
2276  case CmpInst::ICMP_SLT:
2277  case CmpInst::FCMP_ULT:
2278    return AArch64CC::LT;
2279  case CmpInst::ICMP_SLE:
2280  case CmpInst::FCMP_ULE:
2281    return AArch64CC::LE;
2282  case CmpInst::FCMP_UNE:
2283  case CmpInst::ICMP_NE:
2284    return AArch64CC::NE;
2285  case CmpInst::ICMP_UGE:
2286    return AArch64CC::HS;
2287  case CmpInst::ICMP_ULT:
2288    return AArch64CC::LO;
2289  }
2290}
2291
2292/// Try to emit a combined compare-and-branch instruction.
2293bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2294  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2295  // will not be produced, as they are conditional branch instructions that do
2296  // not set flags.
2297  if (FuncInfo.MF->getFunction().hasFnAttribute(
2298          Attribute::SpeculativeLoadHardening))
2299    return false;
2300
2301  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2302  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2303  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2304
2305  const Value *LHS = CI->getOperand(0);
2306  const Value *RHS = CI->getOperand(1);
2307
2308  MVT VT;
2309  if (!isTypeSupported(LHS->getType(), VT))
2310    return false;
2311
2312  unsigned BW = VT.getSizeInBits();
2313  if (BW > 64)
2314    return false;
2315
2316  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2317  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2318
2319  // Try to take advantage of fallthrough opportunities.
2320  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2321    std::swap(TBB, FBB);
2322    Predicate = CmpInst::getInversePredicate(Predicate);
2323  }
2324
2325  int TestBit = -1;
2326  bool IsCmpNE;
2327  switch (Predicate) {
2328  default:
2329    return false;
2330  case CmpInst::ICMP_EQ:
2331  case CmpInst::ICMP_NE:
2332    if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2333      std::swap(LHS, RHS);
2334
2335    if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2336      return false;
2337
2338    if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2339      if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2340        const Value *AndLHS = AI->getOperand(0);
2341        const Value *AndRHS = AI->getOperand(1);
2342
2343        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2344          if (C->getValue().isPowerOf2())
2345            std::swap(AndLHS, AndRHS);
2346
2347        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2348          if (C->getValue().isPowerOf2()) {
2349            TestBit = C->getValue().logBase2();
2350            LHS = AndLHS;
2351          }
2352      }
2353
2354    if (VT == MVT::i1)
2355      TestBit = 0;
2356
2357    IsCmpNE = Predicate == CmpInst::ICMP_NE;
2358    break;
2359  case CmpInst::ICMP_SLT:
2360  case CmpInst::ICMP_SGE:
2361    if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2362      return false;
2363
2364    TestBit = BW - 1;
2365    IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2366    break;
2367  case CmpInst::ICMP_SGT:
2368  case CmpInst::ICMP_SLE:
2369    if (!isa<ConstantInt>(RHS))
2370      return false;
2371
2372    if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2373      return false;
2374
2375    TestBit = BW - 1;
2376    IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2377    break;
2378  } // end switch
2379
2380  static const unsigned OpcTable[2][2][2] = {
2381    { {AArch64::CBZW,  AArch64::CBZX },
2382      {AArch64::CBNZW, AArch64::CBNZX} },
2383    { {AArch64::TBZW,  AArch64::TBZX },
2384      {AArch64::TBNZW, AArch64::TBNZX} }
2385  };
2386
2387  bool IsBitTest = TestBit != -1;
2388  bool Is64Bit = BW == 64;
2389  if (TestBit < 32 && TestBit >= 0)
2390    Is64Bit = false;
2391
2392  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2393  const MCInstrDesc &II = TII.get(Opc);
2394
2395  unsigned SrcReg = getRegForValue(LHS);
2396  if (!SrcReg)
2397    return false;
2398  bool SrcIsKill = hasTrivialKill(LHS);
2399
2400  if (BW == 64 && !Is64Bit)
2401    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2402                                        AArch64::sub_32);
2403
2404  if ((BW < 32) && !IsBitTest)
2405    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2406
2407  // Emit the combined compare and branch instruction.
2408  SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2409  MachineInstrBuilder MIB =
2410      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2411          .addReg(SrcReg, getKillRegState(SrcIsKill));
2412  if (IsBitTest)
2413    MIB.addImm(TestBit);
2414  MIB.addMBB(TBB);
2415
2416  finishCondBranch(BI->getParent(), TBB, FBB);
2417  return true;
2418}
2419
2420bool AArch64FastISel::selectBranch(const Instruction *I) {
2421  const BranchInst *BI = cast<BranchInst>(I);
2422  if (BI->isUnconditional()) {
2423    MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2424    fastEmitBranch(MSucc, BI->getDebugLoc());
2425    return true;
2426  }
2427
2428  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2429  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2430
2431  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2432    if (CI->hasOneUse() && isValueAvailable(CI)) {
2433      // Try to optimize or fold the cmp.
2434      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2435      switch (Predicate) {
2436      default:
2437        break;
2438      case CmpInst::FCMP_FALSE:
2439        fastEmitBranch(FBB, DbgLoc);
2440        return true;
2441      case CmpInst::FCMP_TRUE:
2442        fastEmitBranch(TBB, DbgLoc);
2443        return true;
2444      }
2445
2446      // Try to emit a combined compare-and-branch first.
2447      if (emitCompareAndBranch(BI))
2448        return true;
2449
2450      // Try to take advantage of fallthrough opportunities.
2451      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2452        std::swap(TBB, FBB);
2453        Predicate = CmpInst::getInversePredicate(Predicate);
2454      }
2455
2456      // Emit the cmp.
2457      if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2458        return false;
2459
2460      // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2461      // instruction.
2462      AArch64CC::CondCode CC = getCompareCC(Predicate);
2463      AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2464      switch (Predicate) {
2465      default:
2466        break;
2467      case CmpInst::FCMP_UEQ:
2468        ExtraCC = AArch64CC::EQ;
2469        CC = AArch64CC::VS;
2470        break;
2471      case CmpInst::FCMP_ONE:
2472        ExtraCC = AArch64CC::MI;
2473        CC = AArch64CC::GT;
2474        break;
2475      }
2476      assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2477
2478      // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2479      if (ExtraCC != AArch64CC::AL) {
2480        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2481            .addImm(ExtraCC)
2482            .addMBB(TBB);
2483      }
2484
2485      // Emit the branch.
2486      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2487          .addImm(CC)
2488          .addMBB(TBB);
2489
2490      finishCondBranch(BI->getParent(), TBB, FBB);
2491      return true;
2492    }
2493  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2494    uint64_t Imm = CI->getZExtValue();
2495    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2496    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2497        .addMBB(Target);
2498
2499    // Obtain the branch probability and add the target to the successor list.
2500    if (FuncInfo.BPI) {
2501      auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2502          BI->getParent(), Target->getBasicBlock());
2503      FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2504    } else
2505      FuncInfo.MBB->addSuccessorWithoutProb(Target);
2506    return true;
2507  } else {
2508    AArch64CC::CondCode CC = AArch64CC::NE;
2509    if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2510      // Fake request the condition, otherwise the intrinsic might be completely
2511      // optimized away.
2512      unsigned CondReg = getRegForValue(BI->getCondition());
2513      if (!CondReg)
2514        return false;
2515
2516      // Emit the branch.
2517      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2518        .addImm(CC)
2519        .addMBB(TBB);
2520
2521      finishCondBranch(BI->getParent(), TBB, FBB);
2522      return true;
2523    }
2524  }
2525
2526  unsigned CondReg = getRegForValue(BI->getCondition());
2527  if (CondReg == 0)
2528    return false;
2529  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2530
2531  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2532  unsigned Opcode = AArch64::TBNZW;
2533  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2534    std::swap(TBB, FBB);
2535    Opcode = AArch64::TBZW;
2536  }
2537
2538  const MCInstrDesc &II = TII.get(Opcode);
2539  unsigned ConstrainedCondReg
2540    = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2541  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2542      .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2543      .addImm(0)
2544      .addMBB(TBB);
2545
2546  finishCondBranch(BI->getParent(), TBB, FBB);
2547  return true;
2548}
2549
2550bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2551  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2552  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2553  if (AddrReg == 0)
2554    return false;
2555
2556  // Emit the indirect branch.
2557  const MCInstrDesc &II = TII.get(AArch64::BR);
2558  AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2559  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2560
2561  // Make sure the CFG is up-to-date.
2562  for (auto *Succ : BI->successors())
2563    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2564
2565  return true;
2566}
2567
2568bool AArch64FastISel::selectCmp(const Instruction *I) {
2569  const CmpInst *CI = cast<CmpInst>(I);
2570
2571  // Vectors of i1 are weird: bail out.
2572  if (CI->getType()->isVectorTy())
2573    return false;
2574
2575  // Try to optimize or fold the cmp.
2576  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2577  unsigned ResultReg = 0;
2578  switch (Predicate) {
2579  default:
2580    break;
2581  case CmpInst::FCMP_FALSE:
2582    ResultReg = createResultReg(&AArch64::GPR32RegClass);
2583    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2584            TII.get(TargetOpcode::COPY), ResultReg)
2585        .addReg(AArch64::WZR, getKillRegState(true));
2586    break;
2587  case CmpInst::FCMP_TRUE:
2588    ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2589    break;
2590  }
2591
2592  if (ResultReg) {
2593    updateValueMap(I, ResultReg);
2594    return true;
2595  }
2596
2597  // Emit the cmp.
2598  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2599    return false;
2600
2601  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2602
2603  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2604  // condition codes are inverted, because they are used by CSINC.
2605  static unsigned CondCodeTable[2][2] = {
2606    { AArch64CC::NE, AArch64CC::VC },
2607    { AArch64CC::PL, AArch64CC::LE }
2608  };
2609  unsigned *CondCodes = nullptr;
2610  switch (Predicate) {
2611  default:
2612    break;
2613  case CmpInst::FCMP_UEQ:
2614    CondCodes = &CondCodeTable[0][0];
2615    break;
2616  case CmpInst::FCMP_ONE:
2617    CondCodes = &CondCodeTable[1][0];
2618    break;
2619  }
2620
2621  if (CondCodes) {
2622    unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2623    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2624            TmpReg1)
2625        .addReg(AArch64::WZR, getKillRegState(true))
2626        .addReg(AArch64::WZR, getKillRegState(true))
2627        .addImm(CondCodes[0]);
2628    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2629            ResultReg)
2630        .addReg(TmpReg1, getKillRegState(true))
2631        .addReg(AArch64::WZR, getKillRegState(true))
2632        .addImm(CondCodes[1]);
2633
2634    updateValueMap(I, ResultReg);
2635    return true;
2636  }
2637
2638  // Now set a register based on the comparison.
2639  AArch64CC::CondCode CC = getCompareCC(Predicate);
2640  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2641  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2642  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2643          ResultReg)
2644      .addReg(AArch64::WZR, getKillRegState(true))
2645      .addReg(AArch64::WZR, getKillRegState(true))
2646      .addImm(invertedCC);
2647
2648  updateValueMap(I, ResultReg);
2649  return true;
2650}
2651
2652/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2653/// value.
2654bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2655  if (!SI->getType()->isIntegerTy(1))
2656    return false;
2657
2658  const Value *Src1Val, *Src2Val;
2659  unsigned Opc = 0;
2660  bool NeedExtraOp = false;
2661  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2662    if (CI->isOne()) {
2663      Src1Val = SI->getCondition();
2664      Src2Val = SI->getFalseValue();
2665      Opc = AArch64::ORRWrr;
2666    } else {
2667      assert(CI->isZero());
2668      Src1Val = SI->getFalseValue();
2669      Src2Val = SI->getCondition();
2670      Opc = AArch64::BICWrr;
2671    }
2672  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2673    if (CI->isOne()) {
2674      Src1Val = SI->getCondition();
2675      Src2Val = SI->getTrueValue();
2676      Opc = AArch64::ORRWrr;
2677      NeedExtraOp = true;
2678    } else {
2679      assert(CI->isZero());
2680      Src1Val = SI->getCondition();
2681      Src2Val = SI->getTrueValue();
2682      Opc = AArch64::ANDWrr;
2683    }
2684  }
2685
2686  if (!Opc)
2687    return false;
2688
2689  unsigned Src1Reg = getRegForValue(Src1Val);
2690  if (!Src1Reg)
2691    return false;
2692  bool Src1IsKill = hasTrivialKill(Src1Val);
2693
2694  unsigned Src2Reg = getRegForValue(Src2Val);
2695  if (!Src2Reg)
2696    return false;
2697  bool Src2IsKill = hasTrivialKill(Src2Val);
2698
2699  if (NeedExtraOp) {
2700    Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2701    Src1IsKill = true;
2702  }
2703  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2704                                       Src1IsKill, Src2Reg, Src2IsKill);
2705  updateValueMap(SI, ResultReg);
2706  return true;
2707}
2708
2709bool AArch64FastISel::selectSelect(const Instruction *I) {
2710  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2711  MVT VT;
2712  if (!isTypeSupported(I->getType(), VT))
2713    return false;
2714
2715  unsigned Opc;
2716  const TargetRegisterClass *RC;
2717  switch (VT.SimpleTy) {
2718  default:
2719    return false;
2720  case MVT::i1:
2721  case MVT::i8:
2722  case MVT::i16:
2723  case MVT::i32:
2724    Opc = AArch64::CSELWr;
2725    RC = &AArch64::GPR32RegClass;
2726    break;
2727  case MVT::i64:
2728    Opc = AArch64::CSELXr;
2729    RC = &AArch64::GPR64RegClass;
2730    break;
2731  case MVT::f32:
2732    Opc = AArch64::FCSELSrrr;
2733    RC = &AArch64::FPR32RegClass;
2734    break;
2735  case MVT::f64:
2736    Opc = AArch64::FCSELDrrr;
2737    RC = &AArch64::FPR64RegClass;
2738    break;
2739  }
2740
2741  const SelectInst *SI = cast<SelectInst>(I);
2742  const Value *Cond = SI->getCondition();
2743  AArch64CC::CondCode CC = AArch64CC::NE;
2744  AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2745
2746  if (optimizeSelect(SI))
2747    return true;
2748
2749  // Try to pickup the flags, so we don't have to emit another compare.
2750  if (foldXALUIntrinsic(CC, I, Cond)) {
2751    // Fake request the condition to force emission of the XALU intrinsic.
2752    unsigned CondReg = getRegForValue(Cond);
2753    if (!CondReg)
2754      return false;
2755  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2756             isValueAvailable(Cond)) {
2757    const auto *Cmp = cast<CmpInst>(Cond);
2758    // Try to optimize or fold the cmp.
2759    CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2760    const Value *FoldSelect = nullptr;
2761    switch (Predicate) {
2762    default:
2763      break;
2764    case CmpInst::FCMP_FALSE:
2765      FoldSelect = SI->getFalseValue();
2766      break;
2767    case CmpInst::FCMP_TRUE:
2768      FoldSelect = SI->getTrueValue();
2769      break;
2770    }
2771
2772    if (FoldSelect) {
2773      unsigned SrcReg = getRegForValue(FoldSelect);
2774      if (!SrcReg)
2775        return false;
2776      unsigned UseReg = lookUpRegForValue(SI);
2777      if (UseReg)
2778        MRI.clearKillFlags(UseReg);
2779
2780      updateValueMap(I, SrcReg);
2781      return true;
2782    }
2783
2784    // Emit the cmp.
2785    if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2786      return false;
2787
2788    // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2789    CC = getCompareCC(Predicate);
2790    switch (Predicate) {
2791    default:
2792      break;
2793    case CmpInst::FCMP_UEQ:
2794      ExtraCC = AArch64CC::EQ;
2795      CC = AArch64CC::VS;
2796      break;
2797    case CmpInst::FCMP_ONE:
2798      ExtraCC = AArch64CC::MI;
2799      CC = AArch64CC::GT;
2800      break;
2801    }
2802    assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2803  } else {
2804    unsigned CondReg = getRegForValue(Cond);
2805    if (!CondReg)
2806      return false;
2807    bool CondIsKill = hasTrivialKill(Cond);
2808
2809    const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2810    CondReg = constrainOperandRegClass(II, CondReg, 1);
2811
2812    // Emit a TST instruction (ANDS wzr, reg, #imm).
2813    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2814            AArch64::WZR)
2815        .addReg(CondReg, getKillRegState(CondIsKill))
2816        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2817  }
2818
2819  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2820  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2821
2822  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2823  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2824
2825  if (!Src1Reg || !Src2Reg)
2826    return false;
2827
2828  if (ExtraCC != AArch64CC::AL) {
2829    Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2830                               Src2IsKill, ExtraCC);
2831    Src2IsKill = true;
2832  }
2833  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2834                                        Src2IsKill, CC);
2835  updateValueMap(I, ResultReg);
2836  return true;
2837}
2838
2839bool AArch64FastISel::selectFPExt(const Instruction *I) {
2840  Value *V = I->getOperand(0);
2841  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2842    return false;
2843
2844  unsigned Op = getRegForValue(V);
2845  if (Op == 0)
2846    return false;
2847
2848  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2849  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2850          ResultReg).addReg(Op);
2851  updateValueMap(I, ResultReg);
2852  return true;
2853}
2854
2855bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2856  Value *V = I->getOperand(0);
2857  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2858    return false;
2859
2860  unsigned Op = getRegForValue(V);
2861  if (Op == 0)
2862    return false;
2863
2864  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2865  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2866          ResultReg).addReg(Op);
2867  updateValueMap(I, ResultReg);
2868  return true;
2869}
2870
2871// FPToUI and FPToSI
2872bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2873  MVT DestVT;
2874  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2875    return false;
2876
2877  unsigned SrcReg = getRegForValue(I->getOperand(0));
2878  if (SrcReg == 0)
2879    return false;
2880
2881  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2882  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2883    return false;
2884
2885  unsigned Opc;
2886  if (SrcVT == MVT::f64) {
2887    if (Signed)
2888      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2889    else
2890      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2891  } else {
2892    if (Signed)
2893      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2894    else
2895      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2896  }
2897  unsigned ResultReg = createResultReg(
2898      DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2899  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2900      .addReg(SrcReg);
2901  updateValueMap(I, ResultReg);
2902  return true;
2903}
2904
2905bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2906  MVT DestVT;
2907  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2908    return false;
2909  // Let regular ISEL handle FP16
2910  if (DestVT == MVT::f16)
2911    return false;
2912
2913  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2914         "Unexpected value type.");
2915
2916  unsigned SrcReg = getRegForValue(I->getOperand(0));
2917  if (!SrcReg)
2918    return false;
2919  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2920
2921  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2922
2923  // Handle sign-extension.
2924  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2925    SrcReg =
2926        emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2927    if (!SrcReg)
2928      return false;
2929    SrcIsKill = true;
2930  }
2931
2932  unsigned Opc;
2933  if (SrcVT == MVT::i64) {
2934    if (Signed)
2935      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2936    else
2937      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2938  } else {
2939    if (Signed)
2940      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2941    else
2942      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2943  }
2944
2945  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2946                                      SrcIsKill);
2947  updateValueMap(I, ResultReg);
2948  return true;
2949}
2950
2951bool AArch64FastISel::fastLowerArguments() {
2952  if (!FuncInfo.CanLowerReturn)
2953    return false;
2954
2955  const Function *F = FuncInfo.Fn;
2956  if (F->isVarArg())
2957    return false;
2958
2959  CallingConv::ID CC = F->getCallingConv();
2960  if (CC != CallingConv::C && CC != CallingConv::Swift)
2961    return false;
2962
2963  if (Subtarget->hasCustomCallingConv())
2964    return false;
2965
2966  // Only handle simple cases of up to 8 GPR and FPR each.
2967  unsigned GPRCnt = 0;
2968  unsigned FPRCnt = 0;
2969  for (auto const &Arg : F->args()) {
2970    if (Arg.hasAttribute(Attribute::ByVal) ||
2971        Arg.hasAttribute(Attribute::InReg) ||
2972        Arg.hasAttribute(Attribute::StructRet) ||
2973        Arg.hasAttribute(Attribute::SwiftSelf) ||
2974        Arg.hasAttribute(Attribute::SwiftError) ||
2975        Arg.hasAttribute(Attribute::Nest))
2976      return false;
2977
2978    Type *ArgTy = Arg.getType();
2979    if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2980      return false;
2981
2982    EVT ArgVT = TLI.getValueType(DL, ArgTy);
2983    if (!ArgVT.isSimple())
2984      return false;
2985
2986    MVT VT = ArgVT.getSimpleVT().SimpleTy;
2987    if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2988      return false;
2989
2990    if (VT.isVector() &&
2991        (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2992      return false;
2993
2994    if (VT >= MVT::i1 && VT <= MVT::i64)
2995      ++GPRCnt;
2996    else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2997             VT.is128BitVector())
2998      ++FPRCnt;
2999    else
3000      return false;
3001
3002    if (GPRCnt > 8 || FPRCnt > 8)
3003      return false;
3004  }
3005
3006  static const MCPhysReg Registers[6][8] = {
3007    { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
3008      AArch64::W5, AArch64::W6, AArch64::W7 },
3009    { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
3010      AArch64::X5, AArch64::X6, AArch64::X7 },
3011    { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
3012      AArch64::H5, AArch64::H6, AArch64::H7 },
3013    { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
3014      AArch64::S5, AArch64::S6, AArch64::S7 },
3015    { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
3016      AArch64::D5, AArch64::D6, AArch64::D7 },
3017    { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
3018      AArch64::Q5, AArch64::Q6, AArch64::Q7 }
3019  };
3020
3021  unsigned GPRIdx = 0;
3022  unsigned FPRIdx = 0;
3023  for (auto const &Arg : F->args()) {
3024    MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3025    unsigned SrcReg;
3026    const TargetRegisterClass *RC;
3027    if (VT >= MVT::i1 && VT <= MVT::i32) {
3028      SrcReg = Registers[0][GPRIdx++];
3029      RC = &AArch64::GPR32RegClass;
3030      VT = MVT::i32;
3031    } else if (VT == MVT::i64) {
3032      SrcReg = Registers[1][GPRIdx++];
3033      RC = &AArch64::GPR64RegClass;
3034    } else if (VT == MVT::f16) {
3035      SrcReg = Registers[2][FPRIdx++];
3036      RC = &AArch64::FPR16RegClass;
3037    } else if (VT ==  MVT::f32) {
3038      SrcReg = Registers[3][FPRIdx++];
3039      RC = &AArch64::FPR32RegClass;
3040    } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3041      SrcReg = Registers[4][FPRIdx++];
3042      RC = &AArch64::FPR64RegClass;
3043    } else if (VT.is128BitVector()) {
3044      SrcReg = Registers[5][FPRIdx++];
3045      RC = &AArch64::FPR128RegClass;
3046    } else
3047      llvm_unreachable("Unexpected value type.");
3048
3049    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3050    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3051    // Without this, EmitLiveInCopies may eliminate the livein if its only
3052    // use is a bitcast (which isn't turned into an instruction).
3053    unsigned ResultReg = createResultReg(RC);
3054    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3055            TII.get(TargetOpcode::COPY), ResultReg)
3056        .addReg(DstReg, getKillRegState(true));
3057    updateValueMap(&Arg, ResultReg);
3058  }
3059  return true;
3060}
3061
3062bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3063                                      SmallVectorImpl<MVT> &OutVTs,
3064                                      unsigned &NumBytes) {
3065  CallingConv::ID CC = CLI.CallConv;
3066  SmallVector<CCValAssign, 16> ArgLocs;
3067  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3068  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3069
3070  // Get a count of how many bytes are to be pushed on the stack.
3071  NumBytes = CCInfo.getNextStackOffset();
3072
3073  // Issue CALLSEQ_START
3074  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3075  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3076    .addImm(NumBytes).addImm(0);
3077
3078  // Process the args.
3079  for (CCValAssign &VA : ArgLocs) {
3080    const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3081    MVT ArgVT = OutVTs[VA.getValNo()];
3082
3083    unsigned ArgReg = getRegForValue(ArgVal);
3084    if (!ArgReg)
3085      return false;
3086
3087    // Handle arg promotion: SExt, ZExt, AExt.
3088    switch (VA.getLocInfo()) {
3089    case CCValAssign::Full:
3090      break;
3091    case CCValAssign::SExt: {
3092      MVT DestVT = VA.getLocVT();
3093      MVT SrcVT = ArgVT;
3094      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3095      if (!ArgReg)
3096        return false;
3097      break;
3098    }
3099    case CCValAssign::AExt:
3100    // Intentional fall-through.
3101    case CCValAssign::ZExt: {
3102      MVT DestVT = VA.getLocVT();
3103      MVT SrcVT = ArgVT;
3104      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3105      if (!ArgReg)
3106        return false;
3107      break;
3108    }
3109    default:
3110      llvm_unreachable("Unknown arg promotion!");
3111    }
3112
3113    // Now copy/store arg to correct locations.
3114    if (VA.isRegLoc() && !VA.needsCustom()) {
3115      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3116              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3117      CLI.OutRegs.push_back(VA.getLocReg());
3118    } else if (VA.needsCustom()) {
3119      // FIXME: Handle custom args.
3120      return false;
3121    } else {
3122      assert(VA.isMemLoc() && "Assuming store on stack.");
3123
3124      // Don't emit stores for undef values.
3125      if (isa<UndefValue>(ArgVal))
3126        continue;
3127
3128      // Need to store on the stack.
3129      unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3130
3131      unsigned BEAlign = 0;
3132      if (ArgSize < 8 && !Subtarget->isLittleEndian())
3133        BEAlign = 8 - ArgSize;
3134
3135      Address Addr;
3136      Addr.setKind(Address::RegBase);
3137      Addr.setReg(AArch64::SP);
3138      Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3139
3140      unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3141      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3142          MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3143          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3144
3145      if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3146        return false;
3147    }
3148  }
3149  return true;
3150}
3151
3152bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3153                                 unsigned NumBytes) {
3154  CallingConv::ID CC = CLI.CallConv;
3155
3156  // Issue CALLSEQ_END
3157  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3158  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3159    .addImm(NumBytes).addImm(0);
3160
3161  // Now the return value.
3162  if (RetVT != MVT::isVoid) {
3163    SmallVector<CCValAssign, 16> RVLocs;
3164    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3165    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3166
3167    // Only handle a single return value.
3168    if (RVLocs.size() != 1)
3169      return false;
3170
3171    // Copy all of the result registers out of their specified physreg.
3172    MVT CopyVT = RVLocs[0].getValVT();
3173
3174    // TODO: Handle big-endian results
3175    if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3176      return false;
3177
3178    unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3179    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3180            TII.get(TargetOpcode::COPY), ResultReg)
3181        .addReg(RVLocs[0].getLocReg());
3182    CLI.InRegs.push_back(RVLocs[0].getLocReg());
3183
3184    CLI.ResultReg = ResultReg;
3185    CLI.NumResultRegs = 1;
3186  }
3187
3188  return true;
3189}
3190
3191bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3192  CallingConv::ID CC  = CLI.CallConv;
3193  bool IsTailCall     = CLI.IsTailCall;
3194  bool IsVarArg       = CLI.IsVarArg;
3195  const Value *Callee = CLI.Callee;
3196  MCSymbol *Symbol = CLI.Symbol;
3197
3198  if (!Callee && !Symbol)
3199    return false;
3200
3201  // Allow SelectionDAG isel to handle tail calls.
3202  if (IsTailCall)
3203    return false;
3204
3205  // FIXME: we could and should support this, but for now correctness at -O0 is
3206  // more important.
3207  if (Subtarget->isTargetILP32())
3208    return false;
3209
3210  CodeModel::Model CM = TM.getCodeModel();
3211  // Only support the small-addressing and large code models.
3212  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3213    return false;
3214
3215  // FIXME: Add large code model support for ELF.
3216  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3217    return false;
3218
3219  // Let SDISel handle vararg functions.
3220  if (IsVarArg)
3221    return false;
3222
3223  // FIXME: Only handle *simple* calls for now.
3224  MVT RetVT;
3225  if (CLI.RetTy->isVoidTy())
3226    RetVT = MVT::isVoid;
3227  else if (!isTypeLegal(CLI.RetTy, RetVT))
3228    return false;
3229
3230  for (auto Flag : CLI.OutFlags)
3231    if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3232        Flag.isSwiftSelf() || Flag.isSwiftError())
3233      return false;
3234
3235  // Set up the argument vectors.
3236  SmallVector<MVT, 16> OutVTs;
3237  OutVTs.reserve(CLI.OutVals.size());
3238
3239  for (auto *Val : CLI.OutVals) {
3240    MVT VT;
3241    if (!isTypeLegal(Val->getType(), VT) &&
3242        !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3243      return false;
3244
3245    // We don't handle vector parameters yet.
3246    if (VT.isVector() || VT.getSizeInBits() > 64)
3247      return false;
3248
3249    OutVTs.push_back(VT);
3250  }
3251
3252  Address Addr;
3253  if (Callee && !computeCallAddress(Callee, Addr))
3254    return false;
3255
3256  // The weak function target may be zero; in that case we must use indirect
3257  // addressing via a stub on windows as it may be out of range for a
3258  // PC-relative jump.
3259  if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3260      Addr.getGlobalValue()->hasExternalWeakLinkage())
3261    return false;
3262
3263  // Handle the arguments now that we've gotten them.
3264  unsigned NumBytes;
3265  if (!processCallArgs(CLI, OutVTs, NumBytes))
3266    return false;
3267
3268  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3269  if (RegInfo->isAnyArgRegReserved(*MF))
3270    RegInfo->emitReservedArgRegCallError(*MF);
3271
3272  // Issue the call.
3273  MachineInstrBuilder MIB;
3274  if (Subtarget->useSmallAddressing()) {
3275    const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3276    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3277    if (Symbol)
3278      MIB.addSym(Symbol, 0);
3279    else if (Addr.getGlobalValue())
3280      MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3281    else if (Addr.getReg()) {
3282      unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3283      MIB.addReg(Reg);
3284    } else
3285      return false;
3286  } else {
3287    unsigned CallReg = 0;
3288    if (Symbol) {
3289      unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3290      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3291              ADRPReg)
3292          .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3293
3294      CallReg = createResultReg(&AArch64::GPR64RegClass);
3295      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3296              TII.get(AArch64::LDRXui), CallReg)
3297          .addReg(ADRPReg)
3298          .addSym(Symbol,
3299                  AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3300    } else if (Addr.getGlobalValue())
3301      CallReg = materializeGV(Addr.getGlobalValue());
3302    else if (Addr.getReg())
3303      CallReg = Addr.getReg();
3304
3305    if (!CallReg)
3306      return false;
3307
3308    const MCInstrDesc &II = TII.get(AArch64::BLR);
3309    CallReg = constrainOperandRegClass(II, CallReg, 0);
3310    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3311  }
3312
3313  // Add implicit physical register uses to the call.
3314  for (auto Reg : CLI.OutRegs)
3315    MIB.addReg(Reg, RegState::Implicit);
3316
3317  // Add a register mask with the call-preserved registers.
3318  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3319  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3320
3321  CLI.Call = MIB;
3322
3323  // Finish off the call including any return values.
3324  return finishCall(CLI, RetVT, NumBytes);
3325}
3326
3327bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3328  if (Alignment)
3329    return Len / Alignment <= 4;
3330  else
3331    return Len < 32;
3332}
3333
3334bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3335                                         uint64_t Len, unsigned Alignment) {
3336  // Make sure we don't bloat code by inlining very large memcpy's.
3337  if (!isMemCpySmall(Len, Alignment))
3338    return false;
3339
3340  int64_t UnscaledOffset = 0;
3341  Address OrigDest = Dest;
3342  Address OrigSrc = Src;
3343
3344  while (Len) {
3345    MVT VT;
3346    if (!Alignment || Alignment >= 8) {
3347      if (Len >= 8)
3348        VT = MVT::i64;
3349      else if (Len >= 4)
3350        VT = MVT::i32;
3351      else if (Len >= 2)
3352        VT = MVT::i16;
3353      else {
3354        VT = MVT::i8;
3355      }
3356    } else {
3357      // Bound based on alignment.
3358      if (Len >= 4 && Alignment == 4)
3359        VT = MVT::i32;
3360      else if (Len >= 2 && Alignment == 2)
3361        VT = MVT::i16;
3362      else {
3363        VT = MVT::i8;
3364      }
3365    }
3366
3367    unsigned ResultReg = emitLoad(VT, VT, Src);
3368    if (!ResultReg)
3369      return false;
3370
3371    if (!emitStore(VT, ResultReg, Dest))
3372      return false;
3373
3374    int64_t Size = VT.getSizeInBits() / 8;
3375    Len -= Size;
3376    UnscaledOffset += Size;
3377
3378    // We need to recompute the unscaled offset for each iteration.
3379    Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3380    Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3381  }
3382
3383  return true;
3384}
3385
3386/// Check if it is possible to fold the condition from the XALU intrinsic
3387/// into the user. The condition code will only be updated on success.
3388bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3389                                        const Instruction *I,
3390                                        const Value *Cond) {
3391  if (!isa<ExtractValueInst>(Cond))
3392    return false;
3393
3394  const auto *EV = cast<ExtractValueInst>(Cond);
3395  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3396    return false;
3397
3398  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3399  MVT RetVT;
3400  const Function *Callee = II->getCalledFunction();
3401  Type *RetTy =
3402  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3403  if (!isTypeLegal(RetTy, RetVT))
3404    return false;
3405
3406  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3407    return false;
3408
3409  const Value *LHS = II->getArgOperand(0);
3410  const Value *RHS = II->getArgOperand(1);
3411
3412  // Canonicalize immediate to the RHS.
3413  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3414      isCommutativeIntrinsic(II))
3415    std::swap(LHS, RHS);
3416
3417  // Simplify multiplies.
3418  Intrinsic::ID IID = II->getIntrinsicID();
3419  switch (IID) {
3420  default:
3421    break;
3422  case Intrinsic::smul_with_overflow:
3423    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3424      if (C->getValue() == 2)
3425        IID = Intrinsic::sadd_with_overflow;
3426    break;
3427  case Intrinsic::umul_with_overflow:
3428    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3429      if (C->getValue() == 2)
3430        IID = Intrinsic::uadd_with_overflow;
3431    break;
3432  }
3433
3434  AArch64CC::CondCode TmpCC;
3435  switch (IID) {
3436  default:
3437    return false;
3438  case Intrinsic::sadd_with_overflow:
3439  case Intrinsic::ssub_with_overflow:
3440    TmpCC = AArch64CC::VS;
3441    break;
3442  case Intrinsic::uadd_with_overflow:
3443    TmpCC = AArch64CC::HS;
3444    break;
3445  case Intrinsic::usub_with_overflow:
3446    TmpCC = AArch64CC::LO;
3447    break;
3448  case Intrinsic::smul_with_overflow:
3449  case Intrinsic::umul_with_overflow:
3450    TmpCC = AArch64CC::NE;
3451    break;
3452  }
3453
3454  // Check if both instructions are in the same basic block.
3455  if (!isValueAvailable(II))
3456    return false;
3457
3458  // Make sure nothing is in the way
3459  BasicBlock::const_iterator Start(I);
3460  BasicBlock::const_iterator End(II);
3461  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3462    // We only expect extractvalue instructions between the intrinsic and the
3463    // instruction to be selected.
3464    if (!isa<ExtractValueInst>(Itr))
3465      return false;
3466
3467    // Check that the extractvalue operand comes from the intrinsic.
3468    const auto *EVI = cast<ExtractValueInst>(Itr);
3469    if (EVI->getAggregateOperand() != II)
3470      return false;
3471  }
3472
3473  CC = TmpCC;
3474  return true;
3475}
3476
3477bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3478  // FIXME: Handle more intrinsics.
3479  switch (II->getIntrinsicID()) {
3480  default: return false;
3481  case Intrinsic::frameaddress: {
3482    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3483    MFI.setFrameAddressIsTaken(true);
3484
3485    const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3486    Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3487    Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3488    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3489            TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3490    // Recursively load frame address
3491    // ldr x0, [fp]
3492    // ldr x0, [x0]
3493    // ldr x0, [x0]
3494    // ...
3495    unsigned DestReg;
3496    unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3497    while (Depth--) {
3498      DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3499                                SrcReg, /*IsKill=*/true, 0);
3500      assert(DestReg && "Unexpected LDR instruction emission failure.");
3501      SrcReg = DestReg;
3502    }
3503
3504    updateValueMap(II, SrcReg);
3505    return true;
3506  }
3507  case Intrinsic::sponentry: {
3508    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3509
3510    // SP = FP + Fixed Object + 16
3511    int FI = MFI.CreateFixedObject(4, 0, false);
3512    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3513    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3514            TII.get(AArch64::ADDXri), ResultReg)
3515            .addFrameIndex(FI)
3516            .addImm(0)
3517            .addImm(0);
3518
3519    updateValueMap(II, ResultReg);
3520    return true;
3521  }
3522  case Intrinsic::memcpy:
3523  case Intrinsic::memmove: {
3524    const auto *MTI = cast<MemTransferInst>(II);
3525    // Don't handle volatile.
3526    if (MTI->isVolatile())
3527      return false;
3528
3529    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3530    // we would emit dead code because we don't currently handle memmoves.
3531    bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3532    if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3533      // Small memcpy's are common enough that we want to do them without a call
3534      // if possible.
3535      uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3536      unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3537                                    MTI->getSourceAlignment());
3538      if (isMemCpySmall(Len, Alignment)) {
3539        Address Dest, Src;
3540        if (!computeAddress(MTI->getRawDest(), Dest) ||
3541            !computeAddress(MTI->getRawSource(), Src))
3542          return false;
3543        if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3544          return true;
3545      }
3546    }
3547
3548    if (!MTI->getLength()->getType()->isIntegerTy(64))
3549      return false;
3550
3551    if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3552      // Fast instruction selection doesn't support the special
3553      // address spaces.
3554      return false;
3555
3556    const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3557    return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3558  }
3559  case Intrinsic::memset: {
3560    const MemSetInst *MSI = cast<MemSetInst>(II);
3561    // Don't handle volatile.
3562    if (MSI->isVolatile())
3563      return false;
3564
3565    if (!MSI->getLength()->getType()->isIntegerTy(64))
3566      return false;
3567
3568    if (MSI->getDestAddressSpace() > 255)
3569      // Fast instruction selection doesn't support the special
3570      // address spaces.
3571      return false;
3572
3573    return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3574  }
3575  case Intrinsic::sin:
3576  case Intrinsic::cos:
3577  case Intrinsic::pow: {
3578    MVT RetVT;
3579    if (!isTypeLegal(II->getType(), RetVT))
3580      return false;
3581
3582    if (RetVT != MVT::f32 && RetVT != MVT::f64)
3583      return false;
3584
3585    static const RTLIB::Libcall LibCallTable[3][2] = {
3586      { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3587      { RTLIB::COS_F32, RTLIB::COS_F64 },
3588      { RTLIB::POW_F32, RTLIB::POW_F64 }
3589    };
3590    RTLIB::Libcall LC;
3591    bool Is64Bit = RetVT == MVT::f64;
3592    switch (II->getIntrinsicID()) {
3593    default:
3594      llvm_unreachable("Unexpected intrinsic.");
3595    case Intrinsic::sin:
3596      LC = LibCallTable[0][Is64Bit];
3597      break;
3598    case Intrinsic::cos:
3599      LC = LibCallTable[1][Is64Bit];
3600      break;
3601    case Intrinsic::pow:
3602      LC = LibCallTable[2][Is64Bit];
3603      break;
3604    }
3605
3606    ArgListTy Args;
3607    Args.reserve(II->getNumArgOperands());
3608
3609    // Populate the argument list.
3610    for (auto &Arg : II->arg_operands()) {
3611      ArgListEntry Entry;
3612      Entry.Val = Arg;
3613      Entry.Ty = Arg->getType();
3614      Args.push_back(Entry);
3615    }
3616
3617    CallLoweringInfo CLI;
3618    MCContext &Ctx = MF->getContext();
3619    CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3620                  TLI.getLibcallName(LC), std::move(Args));
3621    if (!lowerCallTo(CLI))
3622      return false;
3623    updateValueMap(II, CLI.ResultReg);
3624    return true;
3625  }
3626  case Intrinsic::fabs: {
3627    MVT VT;
3628    if (!isTypeLegal(II->getType(), VT))
3629      return false;
3630
3631    unsigned Opc;
3632    switch (VT.SimpleTy) {
3633    default:
3634      return false;
3635    case MVT::f32:
3636      Opc = AArch64::FABSSr;
3637      break;
3638    case MVT::f64:
3639      Opc = AArch64::FABSDr;
3640      break;
3641    }
3642    unsigned SrcReg = getRegForValue(II->getOperand(0));
3643    if (!SrcReg)
3644      return false;
3645    bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3646    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3647    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3648      .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3649    updateValueMap(II, ResultReg);
3650    return true;
3651  }
3652  case Intrinsic::trap:
3653    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3654        .addImm(1);
3655    return true;
3656  case Intrinsic::debugtrap: {
3657    if (Subtarget->isTargetWindows()) {
3658      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3659          .addImm(0xF000);
3660      return true;
3661    }
3662    break;
3663  }
3664
3665  case Intrinsic::sqrt: {
3666    Type *RetTy = II->getCalledFunction()->getReturnType();
3667
3668    MVT VT;
3669    if (!isTypeLegal(RetTy, VT))
3670      return false;
3671
3672    unsigned Op0Reg = getRegForValue(II->getOperand(0));
3673    if (!Op0Reg)
3674      return false;
3675    bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3676
3677    unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3678    if (!ResultReg)
3679      return false;
3680
3681    updateValueMap(II, ResultReg);
3682    return true;
3683  }
3684  case Intrinsic::sadd_with_overflow:
3685  case Intrinsic::uadd_with_overflow:
3686  case Intrinsic::ssub_with_overflow:
3687  case Intrinsic::usub_with_overflow:
3688  case Intrinsic::smul_with_overflow:
3689  case Intrinsic::umul_with_overflow: {
3690    // This implements the basic lowering of the xalu with overflow intrinsics.
3691    const Function *Callee = II->getCalledFunction();
3692    auto *Ty = cast<StructType>(Callee->getReturnType());
3693    Type *RetTy = Ty->getTypeAtIndex(0U);
3694
3695    MVT VT;
3696    if (!isTypeLegal(RetTy, VT))
3697      return false;
3698
3699    if (VT != MVT::i32 && VT != MVT::i64)
3700      return false;
3701
3702    const Value *LHS = II->getArgOperand(0);
3703    const Value *RHS = II->getArgOperand(1);
3704    // Canonicalize immediate to the RHS.
3705    if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3706        isCommutativeIntrinsic(II))
3707      std::swap(LHS, RHS);
3708
3709    // Simplify multiplies.
3710    Intrinsic::ID IID = II->getIntrinsicID();
3711    switch (IID) {
3712    default:
3713      break;
3714    case Intrinsic::smul_with_overflow:
3715      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3716        if (C->getValue() == 2) {
3717          IID = Intrinsic::sadd_with_overflow;
3718          RHS = LHS;
3719        }
3720      break;
3721    case Intrinsic::umul_with_overflow:
3722      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3723        if (C->getValue() == 2) {
3724          IID = Intrinsic::uadd_with_overflow;
3725          RHS = LHS;
3726        }
3727      break;
3728    }
3729
3730    unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3731    AArch64CC::CondCode CC = AArch64CC::Invalid;
3732    switch (IID) {
3733    default: llvm_unreachable("Unexpected intrinsic!");
3734    case Intrinsic::sadd_with_overflow:
3735      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3736      CC = AArch64CC::VS;
3737      break;
3738    case Intrinsic::uadd_with_overflow:
3739      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3740      CC = AArch64CC::HS;
3741      break;
3742    case Intrinsic::ssub_with_overflow:
3743      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3744      CC = AArch64CC::VS;
3745      break;
3746    case Intrinsic::usub_with_overflow:
3747      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3748      CC = AArch64CC::LO;
3749      break;
3750    case Intrinsic::smul_with_overflow: {
3751      CC = AArch64CC::NE;
3752      unsigned LHSReg = getRegForValue(LHS);
3753      if (!LHSReg)
3754        return false;
3755      bool LHSIsKill = hasTrivialKill(LHS);
3756
3757      unsigned RHSReg = getRegForValue(RHS);
3758      if (!RHSReg)
3759        return false;
3760      bool RHSIsKill = hasTrivialKill(RHS);
3761
3762      if (VT == MVT::i32) {
3763        MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3764        unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3765                                       /*IsKill=*/false, 32);
3766        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3767                                            AArch64::sub_32);
3768        ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3769                                              AArch64::sub_32);
3770        emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3771                    AArch64_AM::ASR, 31, /*WantResult=*/false);
3772      } else {
3773        assert(VT == MVT::i64 && "Unexpected value type.");
3774        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3775        // reused in the next instruction.
3776        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3777                            /*IsKill=*/false);
3778        unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3779                                        RHSReg, RHSIsKill);
3780        emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3781                    AArch64_AM::ASR, 63, /*WantResult=*/false);
3782      }
3783      break;
3784    }
3785    case Intrinsic::umul_with_overflow: {
3786      CC = AArch64CC::NE;
3787      unsigned LHSReg = getRegForValue(LHS);
3788      if (!LHSReg)
3789        return false;
3790      bool LHSIsKill = hasTrivialKill(LHS);
3791
3792      unsigned RHSReg = getRegForValue(RHS);
3793      if (!RHSReg)
3794        return false;
3795      bool RHSIsKill = hasTrivialKill(RHS);
3796
3797      if (VT == MVT::i32) {
3798        MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3799        emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3800                    /*IsKill=*/false, AArch64_AM::LSR, 32,
3801                    /*WantResult=*/false);
3802        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3803                                            AArch64::sub_32);
3804      } else {
3805        assert(VT == MVT::i64 && "Unexpected value type.");
3806        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3807        // reused in the next instruction.
3808        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3809                            /*IsKill=*/false);
3810        unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3811                                        RHSReg, RHSIsKill);
3812        emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3813                    /*IsKill=*/false, /*WantResult=*/false);
3814      }
3815      break;
3816    }
3817    }
3818
3819    if (MulReg) {
3820      ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3821      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3822              TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3823    }
3824
3825    if (!ResultReg1)
3826      return false;
3827
3828    ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3829                                  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3830                                  /*IsKill=*/true, getInvertedCondCode(CC));
3831    (void)ResultReg2;
3832    assert((ResultReg1 + 1) == ResultReg2 &&
3833           "Nonconsecutive result registers.");
3834    updateValueMap(II, ResultReg1, 2);
3835    return true;
3836  }
3837  }
3838  return false;
3839}
3840
3841bool AArch64FastISel::selectRet(const Instruction *I) {
3842  const ReturnInst *Ret = cast<ReturnInst>(I);
3843  const Function &F = *I->getParent()->getParent();
3844
3845  if (!FuncInfo.CanLowerReturn)
3846    return false;
3847
3848  if (F.isVarArg())
3849    return false;
3850
3851  if (TLI.supportSwiftError() &&
3852      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3853    return false;
3854
3855  if (TLI.supportSplitCSR(FuncInfo.MF))
3856    return false;
3857
3858  // Build a list of return value registers.
3859  SmallVector<unsigned, 4> RetRegs;
3860
3861  if (Ret->getNumOperands() > 0) {
3862    CallingConv::ID CC = F.getCallingConv();
3863    SmallVector<ISD::OutputArg, 4> Outs;
3864    GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3865
3866    // Analyze operands of the call, assigning locations to each operand.
3867    SmallVector<CCValAssign, 16> ValLocs;
3868    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3869    CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3870                                                     : RetCC_AArch64_AAPCS;
3871    CCInfo.AnalyzeReturn(Outs, RetCC);
3872
3873    // Only handle a single return value for now.
3874    if (ValLocs.size() != 1)
3875      return false;
3876
3877    CCValAssign &VA = ValLocs[0];
3878    const Value *RV = Ret->getOperand(0);
3879
3880    // Don't bother handling odd stuff for now.
3881    if ((VA.getLocInfo() != CCValAssign::Full) &&
3882        (VA.getLocInfo() != CCValAssign::BCvt))
3883      return false;
3884
3885    // Only handle register returns for now.
3886    if (!VA.isRegLoc())
3887      return false;
3888
3889    unsigned Reg = getRegForValue(RV);
3890    if (Reg == 0)
3891      return false;
3892
3893    unsigned SrcReg = Reg + VA.getValNo();
3894    Register DestReg = VA.getLocReg();
3895    // Avoid a cross-class copy. This is very unlikely.
3896    if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3897      return false;
3898
3899    EVT RVEVT = TLI.getValueType(DL, RV->getType());
3900    if (!RVEVT.isSimple())
3901      return false;
3902
3903    // Vectors (of > 1 lane) in big endian need tricky handling.
3904    if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3905        !Subtarget->isLittleEndian())
3906      return false;
3907
3908    MVT RVVT = RVEVT.getSimpleVT();
3909    if (RVVT == MVT::f128)
3910      return false;
3911
3912    MVT DestVT = VA.getValVT();
3913    // Special handling for extended integers.
3914    if (RVVT != DestVT) {
3915      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3916        return false;
3917
3918      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3919        return false;
3920
3921      bool IsZExt = Outs[0].Flags.isZExt();
3922      SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3923      if (SrcReg == 0)
3924        return false;
3925    }
3926
3927    // "Callee" (i.e. value producer) zero extends pointers at function
3928    // boundary.
3929    if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3930      SrcReg = emitAnd_ri(MVT::i64, SrcReg, false, 0xffffffff);
3931
3932    // Make the copy.
3933    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3934            TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3935
3936    // Add register to return instruction.
3937    RetRegs.push_back(VA.getLocReg());
3938  }
3939
3940  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3941                                    TII.get(AArch64::RET_ReallyLR));
3942  for (unsigned RetReg : RetRegs)
3943    MIB.addReg(RetReg, RegState::Implicit);
3944  return true;
3945}
3946
3947bool AArch64FastISel::selectTrunc(const Instruction *I) {
3948  Type *DestTy = I->getType();
3949  Value *Op = I->getOperand(0);
3950  Type *SrcTy = Op->getType();
3951
3952  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3953  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3954  if (!SrcEVT.isSimple())
3955    return false;
3956  if (!DestEVT.isSimple())
3957    return false;
3958
3959  MVT SrcVT = SrcEVT.getSimpleVT();
3960  MVT DestVT = DestEVT.getSimpleVT();
3961
3962  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3963      SrcVT != MVT::i8)
3964    return false;
3965  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3966      DestVT != MVT::i1)
3967    return false;
3968
3969  unsigned SrcReg = getRegForValue(Op);
3970  if (!SrcReg)
3971    return false;
3972  bool SrcIsKill = hasTrivialKill(Op);
3973
3974  // If we're truncating from i64 to a smaller non-legal type then generate an
3975  // AND. Otherwise, we know the high bits are undefined and a truncate only
3976  // generate a COPY. We cannot mark the source register also as result
3977  // register, because this can incorrectly transfer the kill flag onto the
3978  // source register.
3979  unsigned ResultReg;
3980  if (SrcVT == MVT::i64) {
3981    uint64_t Mask = 0;
3982    switch (DestVT.SimpleTy) {
3983    default:
3984      // Trunc i64 to i32 is handled by the target-independent fast-isel.
3985      return false;
3986    case MVT::i1:
3987      Mask = 0x1;
3988      break;
3989    case MVT::i8:
3990      Mask = 0xff;
3991      break;
3992    case MVT::i16:
3993      Mask = 0xffff;
3994      break;
3995    }
3996    // Issue an extract_subreg to get the lower 32-bits.
3997    unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3998                                                AArch64::sub_32);
3999    // Create the AND instruction which performs the actual truncation.
4000    ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
4001    assert(ResultReg && "Unexpected AND instruction emission failure.");
4002  } else {
4003    ResultReg = createResultReg(&AArch64::GPR32RegClass);
4004    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4005            TII.get(TargetOpcode::COPY), ResultReg)
4006        .addReg(SrcReg, getKillRegState(SrcIsKill));
4007  }
4008
4009  updateValueMap(I, ResultReg);
4010  return true;
4011}
4012
4013unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4014  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4015          DestVT == MVT::i64) &&
4016         "Unexpected value type.");
4017  // Handle i8 and i16 as i32.
4018  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4019    DestVT = MVT::i32;
4020
4021  if (IsZExt) {
4022    unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
4023    assert(ResultReg && "Unexpected AND instruction emission failure.");
4024    if (DestVT == MVT::i64) {
4025      // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
4026      // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
4027      Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4028      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4029              TII.get(AArch64::SUBREG_TO_REG), Reg64)
4030          .addImm(0)
4031          .addReg(ResultReg)
4032          .addImm(AArch64::sub_32);
4033      ResultReg = Reg64;
4034    }
4035    return ResultReg;
4036  } else {
4037    if (DestVT == MVT::i64) {
4038      // FIXME: We're SExt i1 to i64.
4039      return 0;
4040    }
4041    return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4042                            /*TODO:IsKill=*/false, 0, 0);
4043  }
4044}
4045
4046unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4047                                      unsigned Op1, bool Op1IsKill) {
4048  unsigned Opc, ZReg;
4049  switch (RetVT.SimpleTy) {
4050  default: return 0;
4051  case MVT::i8:
4052  case MVT::i16:
4053  case MVT::i32:
4054    RetVT = MVT::i32;
4055    Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4056  case MVT::i64:
4057    Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4058  }
4059
4060  const TargetRegisterClass *RC =
4061      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4062  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4063                          /*IsKill=*/ZReg, true);
4064}
4065
4066unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4067                                        unsigned Op1, bool Op1IsKill) {
4068  if (RetVT != MVT::i64)
4069    return 0;
4070
4071  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4072                          Op0, Op0IsKill, Op1, Op1IsKill,
4073                          AArch64::XZR, /*IsKill=*/true);
4074}
4075
4076unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4077                                        unsigned Op1, bool Op1IsKill) {
4078  if (RetVT != MVT::i64)
4079    return 0;
4080
4081  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4082                          Op0, Op0IsKill, Op1, Op1IsKill,
4083                          AArch64::XZR, /*IsKill=*/true);
4084}
4085
4086unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4087                                     unsigned Op1Reg, bool Op1IsKill) {
4088  unsigned Opc = 0;
4089  bool NeedTrunc = false;
4090  uint64_t Mask = 0;
4091  switch (RetVT.SimpleTy) {
4092  default: return 0;
4093  case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4094  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4095  case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4096  case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4097  }
4098
4099  const TargetRegisterClass *RC =
4100      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4101  if (NeedTrunc) {
4102    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4103    Op1IsKill = true;
4104  }
4105  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4106                                       Op1IsKill);
4107  if (NeedTrunc)
4108    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4109  return ResultReg;
4110}
4111
4112unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4113                                     bool Op0IsKill, uint64_t Shift,
4114                                     bool IsZExt) {
4115  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4116         "Unexpected source/return type pair.");
4117  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4118          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4119         "Unexpected source value type.");
4120  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4121          RetVT == MVT::i64) && "Unexpected return value type.");
4122
4123  bool Is64Bit = (RetVT == MVT::i64);
4124  unsigned RegSize = Is64Bit ? 64 : 32;
4125  unsigned DstBits = RetVT.getSizeInBits();
4126  unsigned SrcBits = SrcVT.getSizeInBits();
4127  const TargetRegisterClass *RC =
4128      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4129
4130  // Just emit a copy for "zero" shifts.
4131  if (Shift == 0) {
4132    if (RetVT == SrcVT) {
4133      unsigned ResultReg = createResultReg(RC);
4134      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4135              TII.get(TargetOpcode::COPY), ResultReg)
4136          .addReg(Op0, getKillRegState(Op0IsKill));
4137      return ResultReg;
4138    } else
4139      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4140  }
4141
4142  // Don't deal with undefined shifts.
4143  if (Shift >= DstBits)
4144    return 0;
4145
4146  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4147  // {S|U}BFM Wd, Wn, #r, #s
4148  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4149
4150  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4151  // %2 = shl i16 %1, 4
4152  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4153  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4154  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4155  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4156
4157  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4158  // %2 = shl i16 %1, 8
4159  // Wd<32+7-24,32-24> = Wn<7:0>
4160  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4161  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4162  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4163
4164  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4165  // %2 = shl i16 %1, 12
4166  // Wd<32+3-20,32-20> = Wn<3:0>
4167  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4168  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4169  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4170
4171  unsigned ImmR = RegSize - Shift;
4172  // Limit the width to the length of the source type.
4173  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4174  static const unsigned OpcTable[2][2] = {
4175    {AArch64::SBFMWri, AArch64::SBFMXri},
4176    {AArch64::UBFMWri, AArch64::UBFMXri}
4177  };
4178  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4179  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4180    Register TmpReg = MRI.createVirtualRegister(RC);
4181    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4182            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4183        .addImm(0)
4184        .addReg(Op0, getKillRegState(Op0IsKill))
4185        .addImm(AArch64::sub_32);
4186    Op0 = TmpReg;
4187    Op0IsKill = true;
4188  }
4189  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4190}
4191
4192unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4193                                     unsigned Op1Reg, bool Op1IsKill) {
4194  unsigned Opc = 0;
4195  bool NeedTrunc = false;
4196  uint64_t Mask = 0;
4197  switch (RetVT.SimpleTy) {
4198  default: return 0;
4199  case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4200  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4201  case MVT::i32: Opc = AArch64::LSRVWr; break;
4202  case MVT::i64: Opc = AArch64::LSRVXr; break;
4203  }
4204
4205  const TargetRegisterClass *RC =
4206      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4207  if (NeedTrunc) {
4208    Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4209    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4210    Op0IsKill = Op1IsKill = true;
4211  }
4212  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4213                                       Op1IsKill);
4214  if (NeedTrunc)
4215    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4216  return ResultReg;
4217}
4218
4219unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4220                                     bool Op0IsKill, uint64_t Shift,
4221                                     bool IsZExt) {
4222  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4223         "Unexpected source/return type pair.");
4224  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4225          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4226         "Unexpected source value type.");
4227  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4228          RetVT == MVT::i64) && "Unexpected return value type.");
4229
4230  bool Is64Bit = (RetVT == MVT::i64);
4231  unsigned RegSize = Is64Bit ? 64 : 32;
4232  unsigned DstBits = RetVT.getSizeInBits();
4233  unsigned SrcBits = SrcVT.getSizeInBits();
4234  const TargetRegisterClass *RC =
4235      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4236
4237  // Just emit a copy for "zero" shifts.
4238  if (Shift == 0) {
4239    if (RetVT == SrcVT) {
4240      unsigned ResultReg = createResultReg(RC);
4241      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4242              TII.get(TargetOpcode::COPY), ResultReg)
4243      .addReg(Op0, getKillRegState(Op0IsKill));
4244      return ResultReg;
4245    } else
4246      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4247  }
4248
4249  // Don't deal with undefined shifts.
4250  if (Shift >= DstBits)
4251    return 0;
4252
4253  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4254  // {S|U}BFM Wd, Wn, #r, #s
4255  // Wd<s-r:0> = Wn<s:r> when r <= s
4256
4257  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4258  // %2 = lshr i16 %1, 4
4259  // Wd<7-4:0> = Wn<7:4>
4260  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4261  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4262  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4263
4264  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4265  // %2 = lshr i16 %1, 8
4266  // Wd<7-7,0> = Wn<7:7>
4267  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4268  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4269  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4270
4271  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4272  // %2 = lshr i16 %1, 12
4273  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4274  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4275  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4276  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4277
4278  if (Shift >= SrcBits && IsZExt)
4279    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4280
4281  // It is not possible to fold a sign-extend into the LShr instruction. In this
4282  // case emit a sign-extend.
4283  if (!IsZExt) {
4284    Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4285    if (!Op0)
4286      return 0;
4287    Op0IsKill = true;
4288    SrcVT = RetVT;
4289    SrcBits = SrcVT.getSizeInBits();
4290    IsZExt = true;
4291  }
4292
4293  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4294  unsigned ImmS = SrcBits - 1;
4295  static const unsigned OpcTable[2][2] = {
4296    {AArch64::SBFMWri, AArch64::SBFMXri},
4297    {AArch64::UBFMWri, AArch64::UBFMXri}
4298  };
4299  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4300  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4301    Register TmpReg = MRI.createVirtualRegister(RC);
4302    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4303            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4304        .addImm(0)
4305        .addReg(Op0, getKillRegState(Op0IsKill))
4306        .addImm(AArch64::sub_32);
4307    Op0 = TmpReg;
4308    Op0IsKill = true;
4309  }
4310  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4311}
4312
4313unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4314                                     unsigned Op1Reg, bool Op1IsKill) {
4315  unsigned Opc = 0;
4316  bool NeedTrunc = false;
4317  uint64_t Mask = 0;
4318  switch (RetVT.SimpleTy) {
4319  default: return 0;
4320  case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4321  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4322  case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4323  case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4324  }
4325
4326  const TargetRegisterClass *RC =
4327      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4328  if (NeedTrunc) {
4329    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4330    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4331    Op0IsKill = Op1IsKill = true;
4332  }
4333  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4334                                       Op1IsKill);
4335  if (NeedTrunc)
4336    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4337  return ResultReg;
4338}
4339
4340unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4341                                     bool Op0IsKill, uint64_t Shift,
4342                                     bool IsZExt) {
4343  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4344         "Unexpected source/return type pair.");
4345  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4346          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4347         "Unexpected source value type.");
4348  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4349          RetVT == MVT::i64) && "Unexpected return value type.");
4350
4351  bool Is64Bit = (RetVT == MVT::i64);
4352  unsigned RegSize = Is64Bit ? 64 : 32;
4353  unsigned DstBits = RetVT.getSizeInBits();
4354  unsigned SrcBits = SrcVT.getSizeInBits();
4355  const TargetRegisterClass *RC =
4356      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4357
4358  // Just emit a copy for "zero" shifts.
4359  if (Shift == 0) {
4360    if (RetVT == SrcVT) {
4361      unsigned ResultReg = createResultReg(RC);
4362      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4363              TII.get(TargetOpcode::COPY), ResultReg)
4364      .addReg(Op0, getKillRegState(Op0IsKill));
4365      return ResultReg;
4366    } else
4367      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4368  }
4369
4370  // Don't deal with undefined shifts.
4371  if (Shift >= DstBits)
4372    return 0;
4373
4374  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4375  // {S|U}BFM Wd, Wn, #r, #s
4376  // Wd<s-r:0> = Wn<s:r> when r <= s
4377
4378  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4379  // %2 = ashr i16 %1, 4
4380  // Wd<7-4:0> = Wn<7:4>
4381  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4382  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4383  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4384
4385  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4386  // %2 = ashr i16 %1, 8
4387  // Wd<7-7,0> = Wn<7:7>
4388  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4389  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4390  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4391
4392  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4393  // %2 = ashr i16 %1, 12
4394  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4395  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4396  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4397  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4398
4399  if (Shift >= SrcBits && IsZExt)
4400    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4401
4402  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4403  unsigned ImmS = SrcBits - 1;
4404  static const unsigned OpcTable[2][2] = {
4405    {AArch64::SBFMWri, AArch64::SBFMXri},
4406    {AArch64::UBFMWri, AArch64::UBFMXri}
4407  };
4408  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4409  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4410    Register TmpReg = MRI.createVirtualRegister(RC);
4411    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4412            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4413        .addImm(0)
4414        .addReg(Op0, getKillRegState(Op0IsKill))
4415        .addImm(AArch64::sub_32);
4416    Op0 = TmpReg;
4417    Op0IsKill = true;
4418  }
4419  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4420}
4421
4422unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4423                                     bool IsZExt) {
4424  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4425
4426  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4427  // DestVT are odd things, so test to make sure that they are both types we can
4428  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4429  // bail out to SelectionDAG.
4430  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4431       (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4432      ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4433       (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4434    return 0;
4435
4436  unsigned Opc;
4437  unsigned Imm = 0;
4438
4439  switch (SrcVT.SimpleTy) {
4440  default:
4441    return 0;
4442  case MVT::i1:
4443    return emiti1Ext(SrcReg, DestVT, IsZExt);
4444  case MVT::i8:
4445    if (DestVT == MVT::i64)
4446      Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4447    else
4448      Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4449    Imm = 7;
4450    break;
4451  case MVT::i16:
4452    if (DestVT == MVT::i64)
4453      Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4454    else
4455      Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4456    Imm = 15;
4457    break;
4458  case MVT::i32:
4459    assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4460    Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4461    Imm = 31;
4462    break;
4463  }
4464
4465  // Handle i8 and i16 as i32.
4466  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4467    DestVT = MVT::i32;
4468  else if (DestVT == MVT::i64) {
4469    Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4470    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4471            TII.get(AArch64::SUBREG_TO_REG), Src64)
4472        .addImm(0)
4473        .addReg(SrcReg)
4474        .addImm(AArch64::sub_32);
4475    SrcReg = Src64;
4476  }
4477
4478  const TargetRegisterClass *RC =
4479      (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4480  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4481}
4482
4483static bool isZExtLoad(const MachineInstr *LI) {
4484  switch (LI->getOpcode()) {
4485  default:
4486    return false;
4487  case AArch64::LDURBBi:
4488  case AArch64::LDURHHi:
4489  case AArch64::LDURWi:
4490  case AArch64::LDRBBui:
4491  case AArch64::LDRHHui:
4492  case AArch64::LDRWui:
4493  case AArch64::LDRBBroX:
4494  case AArch64::LDRHHroX:
4495  case AArch64::LDRWroX:
4496  case AArch64::LDRBBroW:
4497  case AArch64::LDRHHroW:
4498  case AArch64::LDRWroW:
4499    return true;
4500  }
4501}
4502
4503static bool isSExtLoad(const MachineInstr *LI) {
4504  switch (LI->getOpcode()) {
4505  default:
4506    return false;
4507  case AArch64::LDURSBWi:
4508  case AArch64::LDURSHWi:
4509  case AArch64::LDURSBXi:
4510  case AArch64::LDURSHXi:
4511  case AArch64::LDURSWi:
4512  case AArch64::LDRSBWui:
4513  case AArch64::LDRSHWui:
4514  case AArch64::LDRSBXui:
4515  case AArch64::LDRSHXui:
4516  case AArch64::LDRSWui:
4517  case AArch64::LDRSBWroX:
4518  case AArch64::LDRSHWroX:
4519  case AArch64::LDRSBXroX:
4520  case AArch64::LDRSHXroX:
4521  case AArch64::LDRSWroX:
4522  case AArch64::LDRSBWroW:
4523  case AArch64::LDRSHWroW:
4524  case AArch64::LDRSBXroW:
4525  case AArch64::LDRSHXroW:
4526  case AArch64::LDRSWroW:
4527    return true;
4528  }
4529}
4530
4531bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4532                                         MVT SrcVT) {
4533  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4534  if (!LI || !LI->hasOneUse())
4535    return false;
4536
4537  // Check if the load instruction has already been selected.
4538  unsigned Reg = lookUpRegForValue(LI);
4539  if (!Reg)
4540    return false;
4541
4542  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4543  if (!MI)
4544    return false;
4545
4546  // Check if the correct load instruction has been emitted - SelectionDAG might
4547  // have emitted a zero-extending load, but we need a sign-extending load.
4548  bool IsZExt = isa<ZExtInst>(I);
4549  const auto *LoadMI = MI;
4550  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4551      LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4552    Register LoadReg = MI->getOperand(1).getReg();
4553    LoadMI = MRI.getUniqueVRegDef(LoadReg);
4554    assert(LoadMI && "Expected valid instruction");
4555  }
4556  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4557    return false;
4558
4559  // Nothing to be done.
4560  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4561    updateValueMap(I, Reg);
4562    return true;
4563  }
4564
4565  if (IsZExt) {
4566    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4567    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4568            TII.get(AArch64::SUBREG_TO_REG), Reg64)
4569        .addImm(0)
4570        .addReg(Reg, getKillRegState(true))
4571        .addImm(AArch64::sub_32);
4572    Reg = Reg64;
4573  } else {
4574    assert((MI->getOpcode() == TargetOpcode::COPY &&
4575            MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4576           "Expected copy instruction");
4577    Reg = MI->getOperand(1).getReg();
4578    MachineBasicBlock::iterator I(MI);
4579    removeDeadCode(I, std::next(I));
4580  }
4581  updateValueMap(I, Reg);
4582  return true;
4583}
4584
4585bool AArch64FastISel::selectIntExt(const Instruction *I) {
4586  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4587         "Unexpected integer extend instruction.");
4588  MVT RetVT;
4589  MVT SrcVT;
4590  if (!isTypeSupported(I->getType(), RetVT))
4591    return false;
4592
4593  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4594    return false;
4595
4596  // Try to optimize already sign-/zero-extended values from load instructions.
4597  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4598    return true;
4599
4600  unsigned SrcReg = getRegForValue(I->getOperand(0));
4601  if (!SrcReg)
4602    return false;
4603  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4604
4605  // Try to optimize already sign-/zero-extended values from function arguments.
4606  bool IsZExt = isa<ZExtInst>(I);
4607  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4608    if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4609      if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4610        unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4611        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4612                TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4613            .addImm(0)
4614            .addReg(SrcReg, getKillRegState(SrcIsKill))
4615            .addImm(AArch64::sub_32);
4616        SrcReg = ResultReg;
4617      }
4618      // Conservatively clear all kill flags from all uses, because we are
4619      // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4620      // level. The result of the instruction at IR level might have been
4621      // trivially dead, which is now not longer true.
4622      unsigned UseReg = lookUpRegForValue(I);
4623      if (UseReg)
4624        MRI.clearKillFlags(UseReg);
4625
4626      updateValueMap(I, SrcReg);
4627      return true;
4628    }
4629  }
4630
4631  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4632  if (!ResultReg)
4633    return false;
4634
4635  updateValueMap(I, ResultReg);
4636  return true;
4637}
4638
4639bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4640  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4641  if (!DestEVT.isSimple())
4642    return false;
4643
4644  MVT DestVT = DestEVT.getSimpleVT();
4645  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4646    return false;
4647
4648  unsigned DivOpc;
4649  bool Is64bit = (DestVT == MVT::i64);
4650  switch (ISDOpcode) {
4651  default:
4652    return false;
4653  case ISD::SREM:
4654    DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4655    break;
4656  case ISD::UREM:
4657    DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4658    break;
4659  }
4660  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4661  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4662  if (!Src0Reg)
4663    return false;
4664  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4665
4666  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4667  if (!Src1Reg)
4668    return false;
4669  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4670
4671  const TargetRegisterClass *RC =
4672      (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4673  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4674                                     Src1Reg, /*IsKill=*/false);
4675  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4676  // The remainder is computed as numerator - (quotient * denominator) using the
4677  // MSUB instruction.
4678  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4679                                        Src1Reg, Src1IsKill, Src0Reg,
4680                                        Src0IsKill);
4681  updateValueMap(I, ResultReg);
4682  return true;
4683}
4684
4685bool AArch64FastISel::selectMul(const Instruction *I) {
4686  MVT VT;
4687  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4688    return false;
4689
4690  if (VT.isVector())
4691    return selectBinaryOp(I, ISD::MUL);
4692
4693  const Value *Src0 = I->getOperand(0);
4694  const Value *Src1 = I->getOperand(1);
4695  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4696    if (C->getValue().isPowerOf2())
4697      std::swap(Src0, Src1);
4698
4699  // Try to simplify to a shift instruction.
4700  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4701    if (C->getValue().isPowerOf2()) {
4702      uint64_t ShiftVal = C->getValue().logBase2();
4703      MVT SrcVT = VT;
4704      bool IsZExt = true;
4705      if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4706        if (!isIntExtFree(ZExt)) {
4707          MVT VT;
4708          if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4709            SrcVT = VT;
4710            IsZExt = true;
4711            Src0 = ZExt->getOperand(0);
4712          }
4713        }
4714      } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4715        if (!isIntExtFree(SExt)) {
4716          MVT VT;
4717          if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4718            SrcVT = VT;
4719            IsZExt = false;
4720            Src0 = SExt->getOperand(0);
4721          }
4722        }
4723      }
4724
4725      unsigned Src0Reg = getRegForValue(Src0);
4726      if (!Src0Reg)
4727        return false;
4728      bool Src0IsKill = hasTrivialKill(Src0);
4729
4730      unsigned ResultReg =
4731          emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4732
4733      if (ResultReg) {
4734        updateValueMap(I, ResultReg);
4735        return true;
4736      }
4737    }
4738
4739  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4740  if (!Src0Reg)
4741    return false;
4742  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4743
4744  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4745  if (!Src1Reg)
4746    return false;
4747  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4748
4749  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4750
4751  if (!ResultReg)
4752    return false;
4753
4754  updateValueMap(I, ResultReg);
4755  return true;
4756}
4757
4758bool AArch64FastISel::selectShift(const Instruction *I) {
4759  MVT RetVT;
4760  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4761    return false;
4762
4763  if (RetVT.isVector())
4764    return selectOperator(I, I->getOpcode());
4765
4766  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4767    unsigned ResultReg = 0;
4768    uint64_t ShiftVal = C->getZExtValue();
4769    MVT SrcVT = RetVT;
4770    bool IsZExt = I->getOpcode() != Instruction::AShr;
4771    const Value *Op0 = I->getOperand(0);
4772    if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4773      if (!isIntExtFree(ZExt)) {
4774        MVT TmpVT;
4775        if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4776          SrcVT = TmpVT;
4777          IsZExt = true;
4778          Op0 = ZExt->getOperand(0);
4779        }
4780      }
4781    } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4782      if (!isIntExtFree(SExt)) {
4783        MVT TmpVT;
4784        if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4785          SrcVT = TmpVT;
4786          IsZExt = false;
4787          Op0 = SExt->getOperand(0);
4788        }
4789      }
4790    }
4791
4792    unsigned Op0Reg = getRegForValue(Op0);
4793    if (!Op0Reg)
4794      return false;
4795    bool Op0IsKill = hasTrivialKill(Op0);
4796
4797    switch (I->getOpcode()) {
4798    default: llvm_unreachable("Unexpected instruction.");
4799    case Instruction::Shl:
4800      ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4801      break;
4802    case Instruction::AShr:
4803      ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4804      break;
4805    case Instruction::LShr:
4806      ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4807      break;
4808    }
4809    if (!ResultReg)
4810      return false;
4811
4812    updateValueMap(I, ResultReg);
4813    return true;
4814  }
4815
4816  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4817  if (!Op0Reg)
4818    return false;
4819  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4820
4821  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4822  if (!Op1Reg)
4823    return false;
4824  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4825
4826  unsigned ResultReg = 0;
4827  switch (I->getOpcode()) {
4828  default: llvm_unreachable("Unexpected instruction.");
4829  case Instruction::Shl:
4830    ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4831    break;
4832  case Instruction::AShr:
4833    ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4834    break;
4835  case Instruction::LShr:
4836    ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4837    break;
4838  }
4839
4840  if (!ResultReg)
4841    return false;
4842
4843  updateValueMap(I, ResultReg);
4844  return true;
4845}
4846
4847bool AArch64FastISel::selectBitCast(const Instruction *I) {
4848  MVT RetVT, SrcVT;
4849
4850  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4851    return false;
4852  if (!isTypeLegal(I->getType(), RetVT))
4853    return false;
4854
4855  unsigned Opc;
4856  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4857    Opc = AArch64::FMOVWSr;
4858  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4859    Opc = AArch64::FMOVXDr;
4860  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4861    Opc = AArch64::FMOVSWr;
4862  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4863    Opc = AArch64::FMOVDXr;
4864  else
4865    return false;
4866
4867  const TargetRegisterClass *RC = nullptr;
4868  switch (RetVT.SimpleTy) {
4869  default: llvm_unreachable("Unexpected value type.");
4870  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4871  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4872  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4873  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4874  }
4875  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4876  if (!Op0Reg)
4877    return false;
4878  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4879  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4880
4881  if (!ResultReg)
4882    return false;
4883
4884  updateValueMap(I, ResultReg);
4885  return true;
4886}
4887
4888bool AArch64FastISel::selectFRem(const Instruction *I) {
4889  MVT RetVT;
4890  if (!isTypeLegal(I->getType(), RetVT))
4891    return false;
4892
4893  RTLIB::Libcall LC;
4894  switch (RetVT.SimpleTy) {
4895  default:
4896    return false;
4897  case MVT::f32:
4898    LC = RTLIB::REM_F32;
4899    break;
4900  case MVT::f64:
4901    LC = RTLIB::REM_F64;
4902    break;
4903  }
4904
4905  ArgListTy Args;
4906  Args.reserve(I->getNumOperands());
4907
4908  // Populate the argument list.
4909  for (auto &Arg : I->operands()) {
4910    ArgListEntry Entry;
4911    Entry.Val = Arg;
4912    Entry.Ty = Arg->getType();
4913    Args.push_back(Entry);
4914  }
4915
4916  CallLoweringInfo CLI;
4917  MCContext &Ctx = MF->getContext();
4918  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4919                TLI.getLibcallName(LC), std::move(Args));
4920  if (!lowerCallTo(CLI))
4921    return false;
4922  updateValueMap(I, CLI.ResultReg);
4923  return true;
4924}
4925
4926bool AArch64FastISel::selectSDiv(const Instruction *I) {
4927  MVT VT;
4928  if (!isTypeLegal(I->getType(), VT))
4929    return false;
4930
4931  if (!isa<ConstantInt>(I->getOperand(1)))
4932    return selectBinaryOp(I, ISD::SDIV);
4933
4934  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4935  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4936      !(C.isPowerOf2() || (-C).isPowerOf2()))
4937    return selectBinaryOp(I, ISD::SDIV);
4938
4939  unsigned Lg2 = C.countTrailingZeros();
4940  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4941  if (!Src0Reg)
4942    return false;
4943  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4944
4945  if (cast<BinaryOperator>(I)->isExact()) {
4946    unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4947    if (!ResultReg)
4948      return false;
4949    updateValueMap(I, ResultReg);
4950    return true;
4951  }
4952
4953  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4954  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4955  if (!AddReg)
4956    return false;
4957
4958  // (Src0 < 0) ? Pow2 - 1 : 0;
4959  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4960    return false;
4961
4962  unsigned SelectOpc;
4963  const TargetRegisterClass *RC;
4964  if (VT == MVT::i64) {
4965    SelectOpc = AArch64::CSELXr;
4966    RC = &AArch64::GPR64RegClass;
4967  } else {
4968    SelectOpc = AArch64::CSELWr;
4969    RC = &AArch64::GPR32RegClass;
4970  }
4971  unsigned SelectReg =
4972      fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4973                       Src0IsKill, AArch64CC::LT);
4974  if (!SelectReg)
4975    return false;
4976
4977  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4978  // negate the result.
4979  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4980  unsigned ResultReg;
4981  if (C.isNegative())
4982    ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4983                              SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4984  else
4985    ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4986
4987  if (!ResultReg)
4988    return false;
4989
4990  updateValueMap(I, ResultReg);
4991  return true;
4992}
4993
4994/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4995/// have to duplicate it for AArch64, because otherwise we would fail during the
4996/// sign-extend emission.
4997std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4998  unsigned IdxN = getRegForValue(Idx);
4999  if (IdxN == 0)
5000    // Unhandled operand. Halt "fast" selection and bail.
5001    return std::pair<unsigned, bool>(0, false);
5002
5003  bool IdxNIsKill = hasTrivialKill(Idx);
5004
5005  // If the index is smaller or larger than intptr_t, truncate or extend it.
5006  MVT PtrVT = TLI.getPointerTy(DL);
5007  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
5008  if (IdxVT.bitsLT(PtrVT)) {
5009    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
5010    IdxNIsKill = true;
5011  } else if (IdxVT.bitsGT(PtrVT))
5012    llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
5013  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
5014}
5015
5016/// This is mostly a copy of the existing FastISel GEP code, but we have to
5017/// duplicate it for AArch64, because otherwise we would bail out even for
5018/// simple cases. This is because the standard fastEmit functions don't cover
5019/// MUL at all and ADD is lowered very inefficientily.
5020bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
5021  if (Subtarget->isTargetILP32())
5022    return false;
5023
5024  unsigned N = getRegForValue(I->getOperand(0));
5025  if (!N)
5026    return false;
5027  bool NIsKill = hasTrivialKill(I->getOperand(0));
5028
5029  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
5030  // into a single N = N + TotalOffset.
5031  uint64_t TotalOffs = 0;
5032  MVT VT = TLI.getPointerTy(DL);
5033  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
5034       GTI != E; ++GTI) {
5035    const Value *Idx = GTI.getOperand();
5036    if (auto *StTy = GTI.getStructTypeOrNull()) {
5037      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
5038      // N = N + Offset
5039      if (Field)
5040        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
5041    } else {
5042      Type *Ty = GTI.getIndexedType();
5043
5044      // If this is a constant subscript, handle it quickly.
5045      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5046        if (CI->isZero())
5047          continue;
5048        // N = N + Offset
5049        TotalOffs +=
5050            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
5051        continue;
5052      }
5053      if (TotalOffs) {
5054        N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5055        if (!N)
5056          return false;
5057        NIsKill = true;
5058        TotalOffs = 0;
5059      }
5060
5061      // N = N + Idx * ElementSize;
5062      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5063      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5064      unsigned IdxN = Pair.first;
5065      bool IdxNIsKill = Pair.second;
5066      if (!IdxN)
5067        return false;
5068
5069      if (ElementSize != 1) {
5070        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5071        if (!C)
5072          return false;
5073        IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5074        if (!IdxN)
5075          return false;
5076        IdxNIsKill = true;
5077      }
5078      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5079      if (!N)
5080        return false;
5081    }
5082  }
5083  if (TotalOffs) {
5084    N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5085    if (!N)
5086      return false;
5087  }
5088  updateValueMap(I, N);
5089  return true;
5090}
5091
5092bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5093  assert(TM.getOptLevel() == CodeGenOpt::None &&
5094         "cmpxchg survived AtomicExpand at optlevel > -O0");
5095
5096  auto *RetPairTy = cast<StructType>(I->getType());
5097  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5098  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5099         "cmpxchg has a non-i1 status result");
5100
5101  MVT VT;
5102  if (!isTypeLegal(RetTy, VT))
5103    return false;
5104
5105  const TargetRegisterClass *ResRC;
5106  unsigned Opc, CmpOpc;
5107  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5108  // extractvalue selection doesn't support that.
5109  if (VT == MVT::i32) {
5110    Opc = AArch64::CMP_SWAP_32;
5111    CmpOpc = AArch64::SUBSWrs;
5112    ResRC = &AArch64::GPR32RegClass;
5113  } else if (VT == MVT::i64) {
5114    Opc = AArch64::CMP_SWAP_64;
5115    CmpOpc = AArch64::SUBSXrs;
5116    ResRC = &AArch64::GPR64RegClass;
5117  } else {
5118    return false;
5119  }
5120
5121  const MCInstrDesc &II = TII.get(Opc);
5122
5123  const unsigned AddrReg = constrainOperandRegClass(
5124      II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5125  const unsigned DesiredReg = constrainOperandRegClass(
5126      II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5127  const unsigned NewReg = constrainOperandRegClass(
5128      II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5129
5130  const unsigned ResultReg1 = createResultReg(ResRC);
5131  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5132  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5133
5134  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5135  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5136      .addDef(ResultReg1)
5137      .addDef(ScratchReg)
5138      .addUse(AddrReg)
5139      .addUse(DesiredReg)
5140      .addUse(NewReg);
5141
5142  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5143      .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5144      .addUse(ResultReg1)
5145      .addUse(DesiredReg)
5146      .addImm(0);
5147
5148  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5149      .addDef(ResultReg2)
5150      .addUse(AArch64::WZR)
5151      .addUse(AArch64::WZR)
5152      .addImm(AArch64CC::NE);
5153
5154  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5155  updateValueMap(I, ResultReg1, 2);
5156  return true;
5157}
5158
5159bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5160  switch (I->getOpcode()) {
5161  default:
5162    break;
5163  case Instruction::Add:
5164  case Instruction::Sub:
5165    return selectAddSub(I);
5166  case Instruction::Mul:
5167    return selectMul(I);
5168  case Instruction::SDiv:
5169    return selectSDiv(I);
5170  case Instruction::SRem:
5171    if (!selectBinaryOp(I, ISD::SREM))
5172      return selectRem(I, ISD::SREM);
5173    return true;
5174  case Instruction::URem:
5175    if (!selectBinaryOp(I, ISD::UREM))
5176      return selectRem(I, ISD::UREM);
5177    return true;
5178  case Instruction::Shl:
5179  case Instruction::LShr:
5180  case Instruction::AShr:
5181    return selectShift(I);
5182  case Instruction::And:
5183  case Instruction::Or:
5184  case Instruction::Xor:
5185    return selectLogicalOp(I);
5186  case Instruction::Br:
5187    return selectBranch(I);
5188  case Instruction::IndirectBr:
5189    return selectIndirectBr(I);
5190  case Instruction::BitCast:
5191    if (!FastISel::selectBitCast(I))
5192      return selectBitCast(I);
5193    return true;
5194  case Instruction::FPToSI:
5195    if (!selectCast(I, ISD::FP_TO_SINT))
5196      return selectFPToInt(I, /*Signed=*/true);
5197    return true;
5198  case Instruction::FPToUI:
5199    return selectFPToInt(I, /*Signed=*/false);
5200  case Instruction::ZExt:
5201  case Instruction::SExt:
5202    return selectIntExt(I);
5203  case Instruction::Trunc:
5204    if (!selectCast(I, ISD::TRUNCATE))
5205      return selectTrunc(I);
5206    return true;
5207  case Instruction::FPExt:
5208    return selectFPExt(I);
5209  case Instruction::FPTrunc:
5210    return selectFPTrunc(I);
5211  case Instruction::SIToFP:
5212    if (!selectCast(I, ISD::SINT_TO_FP))
5213      return selectIntToFP(I, /*Signed=*/true);
5214    return true;
5215  case Instruction::UIToFP:
5216    return selectIntToFP(I, /*Signed=*/false);
5217  case Instruction::Load:
5218    return selectLoad(I);
5219  case Instruction::Store:
5220    return selectStore(I);
5221  case Instruction::FCmp:
5222  case Instruction::ICmp:
5223    return selectCmp(I);
5224  case Instruction::Select:
5225    return selectSelect(I);
5226  case Instruction::Ret:
5227    return selectRet(I);
5228  case Instruction::FRem:
5229    return selectFRem(I);
5230  case Instruction::GetElementPtr:
5231    return selectGetElementPtr(I);
5232  case Instruction::AtomicCmpXchg:
5233    return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5234  }
5235
5236  // fall-back to target-independent instruction selection.
5237  return selectOperator(I, I->getOpcode());
5238}
5239
5240namespace llvm {
5241
5242FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5243                                        const TargetLibraryInfo *LibInfo) {
5244  return new AArch64FastISel(FuncInfo, LibInfo);
5245}
5246
5247} // end namespace llvm
5248