1//===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the ARM-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// ARMGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARM.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMISelLowering.h"
21#include "ARMMachineFunctionInfo.h"
22#include "ARMSubtarget.h"
23#include "MCTargetDesc/ARMAddressingModes.h"
24#include "MCTargetDesc/ARMBaseInfo.h"
25#include "Utils/ARMBaseInfo.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/CodeGen/CallingConvLower.h"
31#include "llvm/CodeGen/FastISel.h"
32#include "llvm/CodeGen/FunctionLoweringInfo.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineConstantPool.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineInstr.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/MachineOperand.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/RuntimeLibcalls.h"
44#include "llvm/CodeGen/TargetInstrInfo.h"
45#include "llvm/CodeGen/TargetLowering.h"
46#include "llvm/CodeGen/TargetOpcodes.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Argument.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/CallSite.h"
52#include "llvm/IR/CallingConv.h"
53#include "llvm/IR/Constant.h"
54#include "llvm/IR/Constants.h"
55#include "llvm/IR/DataLayout.h"
56#include "llvm/IR/DerivedTypes.h"
57#include "llvm/IR/Function.h"
58#include "llvm/IR/GetElementPtrTypeIterator.h"
59#include "llvm/IR/GlobalValue.h"
60#include "llvm/IR/GlobalVariable.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
63#include "llvm/IR/Instructions.h"
64#include "llvm/IR/IntrinsicInst.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/Operator.h"
68#include "llvm/IR/Type.h"
69#include "llvm/IR/User.h"
70#include "llvm/IR/Value.h"
71#include "llvm/MC/MCInstrDesc.h"
72#include "llvm/MC/MCRegisterInfo.h"
73#include "llvm/Support/Casting.h"
74#include "llvm/Support/Compiler.h"
75#include "llvm/Support/ErrorHandling.h"
76#include "llvm/Support/MachineValueType.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/Target/TargetMachine.h"
79#include "llvm/Target/TargetOptions.h"
80#include <cassert>
81#include <cstdint>
82#include <utility>
83
84using namespace llvm;
85
86namespace {
87
88  // All possible address modes, plus some.
89  struct Address {
90    enum {
91      RegBase,
92      FrameIndexBase
93    } BaseType = RegBase;
94
95    union {
96      unsigned Reg;
97      int FI;
98    } Base;
99
100    int Offset = 0;
101
102    // Innocuous defaults for our address.
103    Address() {
104      Base.Reg = 0;
105    }
106  };
107
108class ARMFastISel final : public FastISel {
109  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
110  /// make the right decision when generating code for different targets.
111  const ARMSubtarget *Subtarget;
112  Module &M;
113  const TargetMachine &TM;
114  const TargetInstrInfo &TII;
115  const TargetLowering &TLI;
116  ARMFunctionInfo *AFI;
117
118  // Convenience variables to avoid some queries.
119  bool isThumb2;
120  LLVMContext *Context;
121
122  public:
123    explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
124                         const TargetLibraryInfo *libInfo)
125        : FastISel(funcInfo, libInfo),
126          Subtarget(
127              &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
128          M(const_cast<Module &>(*funcInfo.Fn->getParent())),
129          TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
130          TLI(*Subtarget->getTargetLowering()) {
131      AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
132      isThumb2 = AFI->isThumbFunction();
133      Context = &funcInfo.Fn->getContext();
134    }
135
136  private:
137    // Code from FastISel.cpp.
138
139    unsigned fastEmitInst_r(unsigned MachineInstOpcode,
140                            const TargetRegisterClass *RC,
141                            unsigned Op0, bool Op0IsKill);
142    unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
143                             const TargetRegisterClass *RC,
144                             unsigned Op0, bool Op0IsKill,
145                             unsigned Op1, bool Op1IsKill);
146    unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
147                             const TargetRegisterClass *RC,
148                             unsigned Op0, bool Op0IsKill,
149                             uint64_t Imm);
150    unsigned fastEmitInst_i(unsigned MachineInstOpcode,
151                            const TargetRegisterClass *RC,
152                            uint64_t Imm);
153
154    // Backend specific FastISel code.
155
156    bool fastSelectInstruction(const Instruction *I) override;
157    unsigned fastMaterializeConstant(const Constant *C) override;
158    unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
159    bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
160                             const LoadInst *LI) override;
161    bool fastLowerArguments() override;
162
163  #include "ARMGenFastISel.inc"
164
165    // Instruction selection routines.
166
167    bool SelectLoad(const Instruction *I);
168    bool SelectStore(const Instruction *I);
169    bool SelectBranch(const Instruction *I);
170    bool SelectIndirectBr(const Instruction *I);
171    bool SelectCmp(const Instruction *I);
172    bool SelectFPExt(const Instruction *I);
173    bool SelectFPTrunc(const Instruction *I);
174    bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
175    bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
176    bool SelectIToFP(const Instruction *I, bool isSigned);
177    bool SelectFPToI(const Instruction *I, bool isSigned);
178    bool SelectDiv(const Instruction *I, bool isSigned);
179    bool SelectRem(const Instruction *I, bool isSigned);
180    bool SelectCall(const Instruction *I, const char *IntrMemName);
181    bool SelectIntrinsicCall(const IntrinsicInst &I);
182    bool SelectSelect(const Instruction *I);
183    bool SelectRet(const Instruction *I);
184    bool SelectTrunc(const Instruction *I);
185    bool SelectIntExt(const Instruction *I);
186    bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
187
188    // Utility routines.
189
190    bool isPositionIndependent() const;
191    bool isTypeLegal(Type *Ty, MVT &VT);
192    bool isLoadTypeLegal(Type *Ty, MVT &VT);
193    bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
194                    bool isZExt);
195    bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
196                     unsigned Alignment = 0, bool isZExt = true,
197                     bool allocReg = true);
198    bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
199                      unsigned Alignment = 0);
200    bool ARMComputeAddress(const Value *Obj, Address &Addr);
201    void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
202    bool ARMIsMemCpySmall(uint64_t Len);
203    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
204                               unsigned Alignment);
205    unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
206    unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
207    unsigned ARMMaterializeInt(const Constant *C, MVT VT);
208    unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
209    unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
210    unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
211    unsigned ARMSelectCallOp(bool UseReg);
212    unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
213
214    const TargetLowering *getTargetLowering() { return &TLI; }
215
216    // Call handling routines.
217
218    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
219                                  bool Return,
220                                  bool isVarArg);
221    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
222                         SmallVectorImpl<Register> &ArgRegs,
223                         SmallVectorImpl<MVT> &ArgVTs,
224                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
225                         SmallVectorImpl<Register> &RegArgs,
226                         CallingConv::ID CC,
227                         unsigned &NumBytes,
228                         bool isVarArg);
229    unsigned getLibcallReg(const Twine &Name);
230    bool FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
231                    const Instruction *I, CallingConv::ID CC,
232                    unsigned &NumBytes, bool isVarArg);
233    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
234
235    // OptionalDef handling routines.
236
237    bool isARMNEONPred(const MachineInstr *MI);
238    bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
239    const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
240    void AddLoadStoreOperands(MVT VT, Address &Addr,
241                              const MachineInstrBuilder &MIB,
242                              MachineMemOperand::Flags Flags, bool useAM3);
243};
244
245} // end anonymous namespace
246
247// DefinesOptionalPredicate - This is different from DefinesPredicate in that
248// we don't care about implicit defs here, just places we'll need to add a
249// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
250bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
251  if (!MI->hasOptionalDef())
252    return false;
253
254  // Look to see if our OptionalDef is defining CPSR or CCR.
255  for (const MachineOperand &MO : MI->operands()) {
256    if (!MO.isReg() || !MO.isDef()) continue;
257    if (MO.getReg() == ARM::CPSR)
258      *CPSR = true;
259  }
260  return true;
261}
262
263bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
264  const MCInstrDesc &MCID = MI->getDesc();
265
266  // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
267  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
268       AFI->isThumb2Function())
269    return MI->isPredicable();
270
271  for (const MCOperandInfo &opInfo : MCID.operands())
272    if (opInfo.isPredicate())
273      return true;
274
275  return false;
276}
277
278// If the machine is predicable go ahead and add the predicate operands, if
279// it needs default CC operands add those.
280// TODO: If we want to support thumb1 then we'll need to deal with optional
281// CPSR defs that need to be added before the remaining operands. See s_cc_out
282// for descriptions why.
283const MachineInstrBuilder &
284ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
285  MachineInstr *MI = &*MIB;
286
287  // Do we use a predicate? or...
288  // Are we NEON in ARM mode and have a predicate operand? If so, I know
289  // we're not predicable but add it anyways.
290  if (isARMNEONPred(MI))
291    MIB.add(predOps(ARMCC::AL));
292
293  // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
294  // defines CPSR. All other OptionalDefines in ARM are the CCR register.
295  bool CPSR = false;
296  if (DefinesOptionalPredicate(MI, &CPSR))
297    MIB.add(CPSR ? t1CondCodeOp() : condCodeOp());
298  return MIB;
299}
300
301unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
302                                     const TargetRegisterClass *RC,
303                                     unsigned Op0, bool Op0IsKill) {
304  Register ResultReg = createResultReg(RC);
305  const MCInstrDesc &II = TII.get(MachineInstOpcode);
306
307  // Make sure the input operand is sufficiently constrained to be legal
308  // for this instruction.
309  Op0 = constrainOperandRegClass(II, Op0, 1);
310  if (II.getNumDefs() >= 1) {
311    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
312                            ResultReg).addReg(Op0, Op0IsKill * RegState::Kill));
313  } else {
314    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
315                   .addReg(Op0, Op0IsKill * RegState::Kill));
316    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
317                   TII.get(TargetOpcode::COPY), ResultReg)
318                   .addReg(II.ImplicitDefs[0]));
319  }
320  return ResultReg;
321}
322
323unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
324                                      const TargetRegisterClass *RC,
325                                      unsigned Op0, bool Op0IsKill,
326                                      unsigned Op1, bool Op1IsKill) {
327  unsigned ResultReg = createResultReg(RC);
328  const MCInstrDesc &II = TII.get(MachineInstOpcode);
329
330  // Make sure the input operands are sufficiently constrained to be legal
331  // for this instruction.
332  Op0 = constrainOperandRegClass(II, Op0, 1);
333  Op1 = constrainOperandRegClass(II, Op1, 2);
334
335  if (II.getNumDefs() >= 1) {
336    AddOptionalDefs(
337        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
338            .addReg(Op0, Op0IsKill * RegState::Kill)
339            .addReg(Op1, Op1IsKill * RegState::Kill));
340  } else {
341    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
342                   .addReg(Op0, Op0IsKill * RegState::Kill)
343                   .addReg(Op1, Op1IsKill * RegState::Kill));
344    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
345                           TII.get(TargetOpcode::COPY), ResultReg)
346                   .addReg(II.ImplicitDefs[0]));
347  }
348  return ResultReg;
349}
350
351unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
352                                      const TargetRegisterClass *RC,
353                                      unsigned Op0, bool Op0IsKill,
354                                      uint64_t Imm) {
355  unsigned ResultReg = createResultReg(RC);
356  const MCInstrDesc &II = TII.get(MachineInstOpcode);
357
358  // Make sure the input operand is sufficiently constrained to be legal
359  // for this instruction.
360  Op0 = constrainOperandRegClass(II, Op0, 1);
361  if (II.getNumDefs() >= 1) {
362    AddOptionalDefs(
363        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
364            .addReg(Op0, Op0IsKill * RegState::Kill)
365            .addImm(Imm));
366  } else {
367    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
368                   .addReg(Op0, Op0IsKill * RegState::Kill)
369                   .addImm(Imm));
370    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
371                           TII.get(TargetOpcode::COPY), ResultReg)
372                   .addReg(II.ImplicitDefs[0]));
373  }
374  return ResultReg;
375}
376
377unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
378                                     const TargetRegisterClass *RC,
379                                     uint64_t Imm) {
380  unsigned ResultReg = createResultReg(RC);
381  const MCInstrDesc &II = TII.get(MachineInstOpcode);
382
383  if (II.getNumDefs() >= 1) {
384    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
385                            ResultReg).addImm(Imm));
386  } else {
387    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
388                   .addImm(Imm));
389    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
390                           TII.get(TargetOpcode::COPY), ResultReg)
391                   .addReg(II.ImplicitDefs[0]));
392  }
393  return ResultReg;
394}
395
396// TODO: Don't worry about 64-bit now, but when this is fixed remove the
397// checks from the various callers.
398unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
399  if (VT == MVT::f64) return 0;
400
401  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
402  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
403                          TII.get(ARM::VMOVSR), MoveReg)
404                  .addReg(SrcReg));
405  return MoveReg;
406}
407
408unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
409  if (VT == MVT::i64) return 0;
410
411  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
412  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
413                          TII.get(ARM::VMOVRS), MoveReg)
414                  .addReg(SrcReg));
415  return MoveReg;
416}
417
418// For double width floating point we need to materialize two constants
419// (the high and the low) into integer registers then use a move to get
420// the combined constant into an FP reg.
421unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
422  const APFloat Val = CFP->getValueAPF();
423  bool is64bit = VT == MVT::f64;
424
425  // This checks to see if we can use VFP3 instructions to materialize
426  // a constant, otherwise we have to go through the constant pool.
427  if (TLI.isFPImmLegal(Val, VT)) {
428    int Imm;
429    unsigned Opc;
430    if (is64bit) {
431      Imm = ARM_AM::getFP64Imm(Val);
432      Opc = ARM::FCONSTD;
433    } else {
434      Imm = ARM_AM::getFP32Imm(Val);
435      Opc = ARM::FCONSTS;
436    }
437    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
438    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
439                            TII.get(Opc), DestReg).addImm(Imm));
440    return DestReg;
441  }
442
443  // Require VFP2 for loading fp constants.
444  if (!Subtarget->hasVFP2Base()) return false;
445
446  // MachineConstantPool wants an explicit alignment.
447  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
448  if (Align == 0) {
449    // TODO: Figure out if this is correct.
450    Align = DL.getTypeAllocSize(CFP->getType());
451  }
452  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
453  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
454  unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
455
456  // The extra reg is for addrmode5.
457  AddOptionalDefs(
458      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
459          .addConstantPoolIndex(Idx)
460          .addReg(0));
461  return DestReg;
462}
463
464unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
465  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
466    return 0;
467
468  // If we can do this in a single instruction without a constant pool entry
469  // do so now.
470  const ConstantInt *CI = cast<ConstantInt>(C);
471  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
472    unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
473    const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
474      &ARM::GPRRegClass;
475    unsigned ImmReg = createResultReg(RC);
476    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
477                            TII.get(Opc), ImmReg)
478                    .addImm(CI->getZExtValue()));
479    return ImmReg;
480  }
481
482  // Use MVN to emit negative constants.
483  if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
484    unsigned Imm = (unsigned)~(CI->getSExtValue());
485    bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
486      (ARM_AM::getSOImmVal(Imm) != -1);
487    if (UseImm) {
488      unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
489      const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
490                                                 &ARM::GPRRegClass;
491      unsigned ImmReg = createResultReg(RC);
492      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
493                              TII.get(Opc), ImmReg)
494                      .addImm(Imm));
495      return ImmReg;
496    }
497  }
498
499  unsigned ResultReg = 0;
500  if (Subtarget->useMovt())
501    ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
502
503  if (ResultReg)
504    return ResultReg;
505
506  // Load from constant pool.  For now 32-bit only.
507  if (VT != MVT::i32)
508    return 0;
509
510  // MachineConstantPool wants an explicit alignment.
511  unsigned Align = DL.getPrefTypeAlignment(C->getType());
512  if (Align == 0) {
513    // TODO: Figure out if this is correct.
514    Align = DL.getTypeAllocSize(C->getType());
515  }
516  unsigned Idx = MCP.getConstantPoolIndex(C, Align);
517  ResultReg = createResultReg(TLI.getRegClassFor(VT));
518  if (isThumb2)
519    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
520                            TII.get(ARM::t2LDRpci), ResultReg)
521                      .addConstantPoolIndex(Idx));
522  else {
523    // The extra immediate is for addrmode2.
524    ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
525    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
526                            TII.get(ARM::LDRcp), ResultReg)
527                      .addConstantPoolIndex(Idx)
528                      .addImm(0));
529  }
530  return ResultReg;
531}
532
533bool ARMFastISel::isPositionIndependent() const {
534  return TLI.isPositionIndependent();
535}
536
537unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
538  // For now 32-bit only.
539  if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
540
541  // ROPI/RWPI not currently supported.
542  if (Subtarget->isROPI() || Subtarget->isRWPI())
543    return 0;
544
545  bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
546  const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
547                                           : &ARM::GPRRegClass;
548  unsigned DestReg = createResultReg(RC);
549
550  // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
551  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
552  bool IsThreadLocal = GVar && GVar->isThreadLocal();
553  if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
554
555  bool IsPositionIndependent = isPositionIndependent();
556  // Use movw+movt when possible, it avoids constant pool entries.
557  // Non-darwin targets only support static movt relocations in FastISel.
558  if (Subtarget->useMovt() &&
559      (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
560    unsigned Opc;
561    unsigned char TF = 0;
562    if (Subtarget->isTargetMachO())
563      TF = ARMII::MO_NONLAZY;
564
565    if (IsPositionIndependent)
566      Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
567    else
568      Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
569    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
570                            TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
571  } else {
572    // MachineConstantPool wants an explicit alignment.
573    unsigned Align = DL.getPrefTypeAlignment(GV->getType());
574    if (Align == 0) {
575      // TODO: Figure out if this is correct.
576      Align = DL.getTypeAllocSize(GV->getType());
577    }
578
579    if (Subtarget->isTargetELF() && IsPositionIndependent)
580      return ARMLowerPICELF(GV, Align, VT);
581
582    // Grab index.
583    unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
584    unsigned Id = AFI->createPICLabelUId();
585    ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
586                                                                ARMCP::CPValue,
587                                                                PCAdj);
588    unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
589
590    // Load value.
591    MachineInstrBuilder MIB;
592    if (isThumb2) {
593      unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;
594      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
595                    DestReg).addConstantPoolIndex(Idx);
596      if (IsPositionIndependent)
597        MIB.addImm(Id);
598      AddOptionalDefs(MIB);
599    } else {
600      // The extra immediate is for addrmode2.
601      DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
602      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
603                    TII.get(ARM::LDRcp), DestReg)
604                .addConstantPoolIndex(Idx)
605                .addImm(0);
606      AddOptionalDefs(MIB);
607
608      if (IsPositionIndependent) {
609        unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
610        unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
611
612        MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
613                                          DbgLoc, TII.get(Opc), NewDestReg)
614                                  .addReg(DestReg)
615                                  .addImm(Id);
616        AddOptionalDefs(MIB);
617        return NewDestReg;
618      }
619    }
620  }
621
622  if (IsIndirect) {
623    MachineInstrBuilder MIB;
624    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
625    if (isThumb2)
626      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
627                    TII.get(ARM::t2LDRi12), NewDestReg)
628            .addReg(DestReg)
629            .addImm(0);
630    else
631      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
632                    TII.get(ARM::LDRi12), NewDestReg)
633                .addReg(DestReg)
634                .addImm(0);
635    DestReg = NewDestReg;
636    AddOptionalDefs(MIB);
637  }
638
639  return DestReg;
640}
641
642unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
643  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
644
645  // Only handle simple types.
646  if (!CEVT.isSimple()) return 0;
647  MVT VT = CEVT.getSimpleVT();
648
649  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
650    return ARMMaterializeFP(CFP, VT);
651  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
652    return ARMMaterializeGV(GV, VT);
653  else if (isa<ConstantInt>(C))
654    return ARMMaterializeInt(C, VT);
655
656  return 0;
657}
658
659// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
660
661unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
662  // Don't handle dynamic allocas.
663  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
664
665  MVT VT;
666  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
667
668  DenseMap<const AllocaInst*, int>::iterator SI =
669    FuncInfo.StaticAllocaMap.find(AI);
670
671  // This will get lowered later into the correct offsets and registers
672  // via rewriteXFrameIndex.
673  if (SI != FuncInfo.StaticAllocaMap.end()) {
674    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
675    const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
676    unsigned ResultReg = createResultReg(RC);
677    ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
678
679    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
680                            TII.get(Opc), ResultReg)
681                            .addFrameIndex(SI->second)
682                            .addImm(0));
683    return ResultReg;
684  }
685
686  return 0;
687}
688
689bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
690  EVT evt = TLI.getValueType(DL, Ty, true);
691
692  // Only handle simple types.
693  if (evt == MVT::Other || !evt.isSimple()) return false;
694  VT = evt.getSimpleVT();
695
696  // Handle all legal types, i.e. a register that will directly hold this
697  // value.
698  return TLI.isTypeLegal(VT);
699}
700
701bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
702  if (isTypeLegal(Ty, VT)) return true;
703
704  // If this is a type than can be sign or zero-extended to a basic operation
705  // go ahead and accept it now.
706  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
707    return true;
708
709  return false;
710}
711
712// Computes the address to get to an object.
713bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
714  // Some boilerplate from the X86 FastISel.
715  const User *U = nullptr;
716  unsigned Opcode = Instruction::UserOp1;
717  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
718    // Don't walk into other basic blocks unless the object is an alloca from
719    // another block, otherwise it may not have a virtual register assigned.
720    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
721        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
722      Opcode = I->getOpcode();
723      U = I;
724    }
725  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
726    Opcode = C->getOpcode();
727    U = C;
728  }
729
730  if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
731    if (Ty->getAddressSpace() > 255)
732      // Fast instruction selection doesn't support the special
733      // address spaces.
734      return false;
735
736  switch (Opcode) {
737    default:
738    break;
739    case Instruction::BitCast:
740      // Look through bitcasts.
741      return ARMComputeAddress(U->getOperand(0), Addr);
742    case Instruction::IntToPtr:
743      // Look past no-op inttoptrs.
744      if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
745          TLI.getPointerTy(DL))
746        return ARMComputeAddress(U->getOperand(0), Addr);
747      break;
748    case Instruction::PtrToInt:
749      // Look past no-op ptrtoints.
750      if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
751        return ARMComputeAddress(U->getOperand(0), Addr);
752      break;
753    case Instruction::GetElementPtr: {
754      Address SavedAddr = Addr;
755      int TmpOffset = Addr.Offset;
756
757      // Iterate through the GEP folding the constants into offsets where
758      // we can.
759      gep_type_iterator GTI = gep_type_begin(U);
760      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
761           i != e; ++i, ++GTI) {
762        const Value *Op = *i;
763        if (StructType *STy = GTI.getStructTypeOrNull()) {
764          const StructLayout *SL = DL.getStructLayout(STy);
765          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
766          TmpOffset += SL->getElementOffset(Idx);
767        } else {
768          uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
769          while (true) {
770            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
771              // Constant-offset addressing.
772              TmpOffset += CI->getSExtValue() * S;
773              break;
774            }
775            if (canFoldAddIntoGEP(U, Op)) {
776              // A compatible add with a constant operand. Fold the constant.
777              ConstantInt *CI =
778              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
779              TmpOffset += CI->getSExtValue() * S;
780              // Iterate on the other operand.
781              Op = cast<AddOperator>(Op)->getOperand(0);
782              continue;
783            }
784            // Unsupported
785            goto unsupported_gep;
786          }
787        }
788      }
789
790      // Try to grab the base operand now.
791      Addr.Offset = TmpOffset;
792      if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
793
794      // We failed, restore everything and try the other options.
795      Addr = SavedAddr;
796
797      unsupported_gep:
798      break;
799    }
800    case Instruction::Alloca: {
801      const AllocaInst *AI = cast<AllocaInst>(Obj);
802      DenseMap<const AllocaInst*, int>::iterator SI =
803        FuncInfo.StaticAllocaMap.find(AI);
804      if (SI != FuncInfo.StaticAllocaMap.end()) {
805        Addr.BaseType = Address::FrameIndexBase;
806        Addr.Base.FI = SI->second;
807        return true;
808      }
809      break;
810    }
811  }
812
813  // Try to get this in a register if nothing else has worked.
814  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
815  return Addr.Base.Reg != 0;
816}
817
818void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
819  bool needsLowering = false;
820  switch (VT.SimpleTy) {
821    default: llvm_unreachable("Unhandled load/store type!");
822    case MVT::i1:
823    case MVT::i8:
824    case MVT::i16:
825    case MVT::i32:
826      if (!useAM3) {
827        // Integer loads/stores handle 12-bit offsets.
828        needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
829        // Handle negative offsets.
830        if (needsLowering && isThumb2)
831          needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
832                            Addr.Offset > -256);
833      } else {
834        // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
835        needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
836      }
837      break;
838    case MVT::f32:
839    case MVT::f64:
840      // Floating point operands handle 8-bit offsets.
841      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
842      break;
843  }
844
845  // If this is a stack pointer and the offset needs to be simplified then
846  // put the alloca address into a register, set the base type back to
847  // register and continue. This should almost never happen.
848  if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
849    const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
850                                             : &ARM::GPRRegClass;
851    unsigned ResultReg = createResultReg(RC);
852    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
853    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
854                            TII.get(Opc), ResultReg)
855                            .addFrameIndex(Addr.Base.FI)
856                            .addImm(0));
857    Addr.Base.Reg = ResultReg;
858    Addr.BaseType = Address::RegBase;
859  }
860
861  // Since the offset is too large for the load/store instruction
862  // get the reg+offset into a register.
863  if (needsLowering) {
864    Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
865                                 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
866    Addr.Offset = 0;
867  }
868}
869
870void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
871                                       const MachineInstrBuilder &MIB,
872                                       MachineMemOperand::Flags Flags,
873                                       bool useAM3) {
874  // addrmode5 output depends on the selection dag addressing dividing the
875  // offset by 4 that it then later multiplies. Do this here as well.
876  if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
877    Addr.Offset /= 4;
878
879  // Frame base works a bit differently. Handle it separately.
880  if (Addr.BaseType == Address::FrameIndexBase) {
881    int FI = Addr.Base.FI;
882    int Offset = Addr.Offset;
883    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
884        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
885        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
886    // Now add the rest of the operands.
887    MIB.addFrameIndex(FI);
888
889    // ARM halfword load/stores and signed byte loads need an additional
890    // operand.
891    if (useAM3) {
892      int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
893      MIB.addReg(0);
894      MIB.addImm(Imm);
895    } else {
896      MIB.addImm(Addr.Offset);
897    }
898    MIB.addMemOperand(MMO);
899  } else {
900    // Now add the rest of the operands.
901    MIB.addReg(Addr.Base.Reg);
902
903    // ARM halfword load/stores and signed byte loads need an additional
904    // operand.
905    if (useAM3) {
906      int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
907      MIB.addReg(0);
908      MIB.addImm(Imm);
909    } else {
910      MIB.addImm(Addr.Offset);
911    }
912  }
913  AddOptionalDefs(MIB);
914}
915
916bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
917                              unsigned Alignment, bool isZExt, bool allocReg) {
918  unsigned Opc;
919  bool useAM3 = false;
920  bool needVMOV = false;
921  const TargetRegisterClass *RC;
922  switch (VT.SimpleTy) {
923    // This is mostly going to be Neon/vector support.
924    default: return false;
925    case MVT::i1:
926    case MVT::i8:
927      if (isThumb2) {
928        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
929          Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
930        else
931          Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
932      } else {
933        if (isZExt) {
934          Opc = ARM::LDRBi12;
935        } else {
936          Opc = ARM::LDRSB;
937          useAM3 = true;
938        }
939      }
940      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
941      break;
942    case MVT::i16:
943      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
944        return false;
945
946      if (isThumb2) {
947        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
948          Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
949        else
950          Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
951      } else {
952        Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
953        useAM3 = true;
954      }
955      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
956      break;
957    case MVT::i32:
958      if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
959        return false;
960
961      if (isThumb2) {
962        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
963          Opc = ARM::t2LDRi8;
964        else
965          Opc = ARM::t2LDRi12;
966      } else {
967        Opc = ARM::LDRi12;
968      }
969      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
970      break;
971    case MVT::f32:
972      if (!Subtarget->hasVFP2Base()) return false;
973      // Unaligned loads need special handling. Floats require word-alignment.
974      if (Alignment && Alignment < 4) {
975        needVMOV = true;
976        VT = MVT::i32;
977        Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
978        RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
979      } else {
980        Opc = ARM::VLDRS;
981        RC = TLI.getRegClassFor(VT);
982      }
983      break;
984    case MVT::f64:
985      // Can load and store double precision even without FeatureFP64
986      if (!Subtarget->hasVFP2Base()) return false;
987      // FIXME: Unaligned loads need special handling.  Doublewords require
988      // word-alignment.
989      if (Alignment && Alignment < 4)
990        return false;
991
992      Opc = ARM::VLDRD;
993      RC = TLI.getRegClassFor(VT);
994      break;
995  }
996  // Simplify this down to something we can handle.
997  ARMSimplifyAddress(Addr, VT, useAM3);
998
999  // Create the base instruction, then add the operands.
1000  if (allocReg)
1001    ResultReg = createResultReg(RC);
1002  assert(ResultReg > 255 && "Expected an allocated virtual register.");
1003  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1004                                    TII.get(Opc), ResultReg);
1005  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
1006
1007  // If we had an unaligned load of a float we've converted it to an regular
1008  // load.  Now we must move from the GRP to the FP register.
1009  if (needVMOV) {
1010    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1011    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1012                            TII.get(ARM::VMOVSR), MoveReg)
1013                    .addReg(ResultReg));
1014    ResultReg = MoveReg;
1015  }
1016  return true;
1017}
1018
1019bool ARMFastISel::SelectLoad(const Instruction *I) {
1020  // Atomic loads need special handling.
1021  if (cast<LoadInst>(I)->isAtomic())
1022    return false;
1023
1024  const Value *SV = I->getOperand(0);
1025  if (TLI.supportSwiftError()) {
1026    // Swifterror values can come from either a function parameter with
1027    // swifterror attribute or an alloca with swifterror attribute.
1028    if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1029      if (Arg->hasSwiftErrorAttr())
1030        return false;
1031    }
1032
1033    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1034      if (Alloca->isSwiftError())
1035        return false;
1036    }
1037  }
1038
1039  // Verify we have a legal type before going any further.
1040  MVT VT;
1041  if (!isLoadTypeLegal(I->getType(), VT))
1042    return false;
1043
1044  // See if we can handle this address.
1045  Address Addr;
1046  if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
1047
1048  Register ResultReg;
1049  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
1050    return false;
1051  updateValueMap(I, ResultReg);
1052  return true;
1053}
1054
1055bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
1056                               unsigned Alignment) {
1057  unsigned StrOpc;
1058  bool useAM3 = false;
1059  switch (VT.SimpleTy) {
1060    // This is mostly going to be Neon/vector support.
1061    default: return false;
1062    case MVT::i1: {
1063      unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
1064                                              : &ARM::GPRRegClass);
1065      unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1066      SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
1067      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1068                              TII.get(Opc), Res)
1069                      .addReg(SrcReg).addImm(1));
1070      SrcReg = Res;
1071      LLVM_FALLTHROUGH;
1072    }
1073    case MVT::i8:
1074      if (isThumb2) {
1075        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1076          StrOpc = ARM::t2STRBi8;
1077        else
1078          StrOpc = ARM::t2STRBi12;
1079      } else {
1080        StrOpc = ARM::STRBi12;
1081      }
1082      break;
1083    case MVT::i16:
1084      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
1085        return false;
1086
1087      if (isThumb2) {
1088        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1089          StrOpc = ARM::t2STRHi8;
1090        else
1091          StrOpc = ARM::t2STRHi12;
1092      } else {
1093        StrOpc = ARM::STRH;
1094        useAM3 = true;
1095      }
1096      break;
1097    case MVT::i32:
1098      if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
1099        return false;
1100
1101      if (isThumb2) {
1102        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1103          StrOpc = ARM::t2STRi8;
1104        else
1105          StrOpc = ARM::t2STRi12;
1106      } else {
1107        StrOpc = ARM::STRi12;
1108      }
1109      break;
1110    case MVT::f32:
1111      if (!Subtarget->hasVFP2Base()) return false;
1112      // Unaligned stores need special handling. Floats require word-alignment.
1113      if (Alignment && Alignment < 4) {
1114        unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1115        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1116                                TII.get(ARM::VMOVRS), MoveReg)
1117                        .addReg(SrcReg));
1118        SrcReg = MoveReg;
1119        VT = MVT::i32;
1120        StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1121      } else {
1122        StrOpc = ARM::VSTRS;
1123      }
1124      break;
1125    case MVT::f64:
1126      // Can load and store double precision even without FeatureFP64
1127      if (!Subtarget->hasVFP2Base()) return false;
1128      // FIXME: Unaligned stores need special handling.  Doublewords require
1129      // word-alignment.
1130      if (Alignment && Alignment < 4)
1131          return false;
1132
1133      StrOpc = ARM::VSTRD;
1134      break;
1135  }
1136  // Simplify this down to something we can handle.
1137  ARMSimplifyAddress(Addr, VT, useAM3);
1138
1139  // Create the base instruction, then add the operands.
1140  SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
1141  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1142                                    TII.get(StrOpc))
1143                            .addReg(SrcReg);
1144  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
1145  return true;
1146}
1147
1148bool ARMFastISel::SelectStore(const Instruction *I) {
1149  Value *Op0 = I->getOperand(0);
1150  unsigned SrcReg = 0;
1151
1152  // Atomic stores need special handling.
1153  if (cast<StoreInst>(I)->isAtomic())
1154    return false;
1155
1156  const Value *PtrV = I->getOperand(1);
1157  if (TLI.supportSwiftError()) {
1158    // Swifterror values can come from either a function parameter with
1159    // swifterror attribute or an alloca with swifterror attribute.
1160    if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1161      if (Arg->hasSwiftErrorAttr())
1162        return false;
1163    }
1164
1165    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1166      if (Alloca->isSwiftError())
1167        return false;
1168    }
1169  }
1170
1171  // Verify we have a legal type before going any further.
1172  MVT VT;
1173  if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
1174    return false;
1175
1176  // Get the value to be stored into a register.
1177  SrcReg = getRegForValue(Op0);
1178  if (SrcReg == 0) return false;
1179
1180  // See if we can handle this address.
1181  Address Addr;
1182  if (!ARMComputeAddress(I->getOperand(1), Addr))
1183    return false;
1184
1185  if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
1186    return false;
1187  return true;
1188}
1189
1190static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1191  switch (Pred) {
1192    // Needs two compares...
1193    case CmpInst::FCMP_ONE:
1194    case CmpInst::FCMP_UEQ:
1195    default:
1196      // AL is our "false" for now. The other two need more compares.
1197      return ARMCC::AL;
1198    case CmpInst::ICMP_EQ:
1199    case CmpInst::FCMP_OEQ:
1200      return ARMCC::EQ;
1201    case CmpInst::ICMP_SGT:
1202    case CmpInst::FCMP_OGT:
1203      return ARMCC::GT;
1204    case CmpInst::ICMP_SGE:
1205    case CmpInst::FCMP_OGE:
1206      return ARMCC::GE;
1207    case CmpInst::ICMP_UGT:
1208    case CmpInst::FCMP_UGT:
1209      return ARMCC::HI;
1210    case CmpInst::FCMP_OLT:
1211      return ARMCC::MI;
1212    case CmpInst::ICMP_ULE:
1213    case CmpInst::FCMP_OLE:
1214      return ARMCC::LS;
1215    case CmpInst::FCMP_ORD:
1216      return ARMCC::VC;
1217    case CmpInst::FCMP_UNO:
1218      return ARMCC::VS;
1219    case CmpInst::FCMP_UGE:
1220      return ARMCC::PL;
1221    case CmpInst::ICMP_SLT:
1222    case CmpInst::FCMP_ULT:
1223      return ARMCC::LT;
1224    case CmpInst::ICMP_SLE:
1225    case CmpInst::FCMP_ULE:
1226      return ARMCC::LE;
1227    case CmpInst::FCMP_UNE:
1228    case CmpInst::ICMP_NE:
1229      return ARMCC::NE;
1230    case CmpInst::ICMP_UGE:
1231      return ARMCC::HS;
1232    case CmpInst::ICMP_ULT:
1233      return ARMCC::LO;
1234  }
1235}
1236
1237bool ARMFastISel::SelectBranch(const Instruction *I) {
1238  const BranchInst *BI = cast<BranchInst>(I);
1239  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1240  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1241
1242  // Simple branch support.
1243
1244  // If we can, avoid recomputing the compare - redoing it could lead to wonky
1245  // behavior.
1246  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1247    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1248      // Get the compare predicate.
1249      // Try to take advantage of fallthrough opportunities.
1250      CmpInst::Predicate Predicate = CI->getPredicate();
1251      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1252        std::swap(TBB, FBB);
1253        Predicate = CmpInst::getInversePredicate(Predicate);
1254      }
1255
1256      ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1257
1258      // We may not handle every CC for now.
1259      if (ARMPred == ARMCC::AL) return false;
1260
1261      // Emit the compare.
1262      if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1263        return false;
1264
1265      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1266      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1267      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1268      finishCondBranch(BI->getParent(), TBB, FBB);
1269      return true;
1270    }
1271  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1272    MVT SourceVT;
1273    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1274        (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1275      unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1276      unsigned OpReg = getRegForValue(TI->getOperand(0));
1277      OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
1278      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1279                              TII.get(TstOpc))
1280                      .addReg(OpReg).addImm(1));
1281
1282      unsigned CCMode = ARMCC::NE;
1283      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1284        std::swap(TBB, FBB);
1285        CCMode = ARMCC::EQ;
1286      }
1287
1288      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1289      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1290      .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1291
1292      finishCondBranch(BI->getParent(), TBB, FBB);
1293      return true;
1294    }
1295  } else if (const ConstantInt *CI =
1296             dyn_cast<ConstantInt>(BI->getCondition())) {
1297    uint64_t Imm = CI->getZExtValue();
1298    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1299    fastEmitBranch(Target, DbgLoc);
1300    return true;
1301  }
1302
1303  unsigned CmpReg = getRegForValue(BI->getCondition());
1304  if (CmpReg == 0) return false;
1305
1306  // We've been divorced from our compare!  Our block was split, and
1307  // now our compare lives in a predecessor block.  We musn't
1308  // re-compare here, as the children of the compare aren't guaranteed
1309  // live across the block boundary (we *could* check for this).
1310  // Regardless, the compare has been done in the predecessor block,
1311  // and it left a value for us in a virtual register.  Ergo, we test
1312  // the one-bit value left in the virtual register.
1313  unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1314  CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
1315  AddOptionalDefs(
1316      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
1317          .addReg(CmpReg)
1318          .addImm(1));
1319
1320  unsigned CCMode = ARMCC::NE;
1321  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1322    std::swap(TBB, FBB);
1323    CCMode = ARMCC::EQ;
1324  }
1325
1326  unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1327  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1328                  .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1329  finishCondBranch(BI->getParent(), TBB, FBB);
1330  return true;
1331}
1332
1333bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1334  unsigned AddrReg = getRegForValue(I->getOperand(0));
1335  if (AddrReg == 0) return false;
1336
1337  unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1338  assert(isThumb2 || Subtarget->hasV4TOps());
1339
1340  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1341                          TII.get(Opc)).addReg(AddrReg));
1342
1343  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1344  for (const BasicBlock *SuccBB : IB->successors())
1345    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
1346
1347  return true;
1348}
1349
1350bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1351                             bool isZExt) {
1352  Type *Ty = Src1Value->getType();
1353  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
1354  if (!SrcEVT.isSimple()) return false;
1355  MVT SrcVT = SrcEVT.getSimpleVT();
1356
1357  if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
1358    return false;
1359
1360  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
1361    return false;
1362
1363  // Check to see if the 2nd operand is a constant that we can encode directly
1364  // in the compare.
1365  int Imm = 0;
1366  bool UseImm = false;
1367  bool isNegativeImm = false;
1368  // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1369  // Thus, Src1Value may be a ConstantInt, but we're missing it.
1370  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1371    if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
1372        SrcVT == MVT::i1) {
1373      const APInt &CIVal = ConstInt->getValue();
1374      Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1375      // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1376      // then a cmn, because there is no way to represent 2147483648 as a
1377      // signed 32-bit int.
1378      if (Imm < 0 && Imm != (int)0x80000000) {
1379        isNegativeImm = true;
1380        Imm = -Imm;
1381      }
1382      UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1383        (ARM_AM::getSOImmVal(Imm) != -1);
1384    }
1385  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1386    if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1387      if (ConstFP->isZero() && !ConstFP->isNegative())
1388        UseImm = true;
1389  }
1390
1391  unsigned CmpOpc;
1392  bool isICmp = true;
1393  bool needsExt = false;
1394  switch (SrcVT.SimpleTy) {
1395    default: return false;
1396    // TODO: Verify compares.
1397    case MVT::f32:
1398      isICmp = false;
1399      CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
1400      break;
1401    case MVT::f64:
1402      isICmp = false;
1403      CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
1404      break;
1405    case MVT::i1:
1406    case MVT::i8:
1407    case MVT::i16:
1408      needsExt = true;
1409      LLVM_FALLTHROUGH;
1410    case MVT::i32:
1411      if (isThumb2) {
1412        if (!UseImm)
1413          CmpOpc = ARM::t2CMPrr;
1414        else
1415          CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1416      } else {
1417        if (!UseImm)
1418          CmpOpc = ARM::CMPrr;
1419        else
1420          CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1421      }
1422      break;
1423  }
1424
1425  unsigned SrcReg1 = getRegForValue(Src1Value);
1426  if (SrcReg1 == 0) return false;
1427
1428  unsigned SrcReg2 = 0;
1429  if (!UseImm) {
1430    SrcReg2 = getRegForValue(Src2Value);
1431    if (SrcReg2 == 0) return false;
1432  }
1433
1434  // We have i1, i8, or i16, we need to either zero extend or sign extend.
1435  if (needsExt) {
1436    SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1437    if (SrcReg1 == 0) return false;
1438    if (!UseImm) {
1439      SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1440      if (SrcReg2 == 0) return false;
1441    }
1442  }
1443
1444  const MCInstrDesc &II = TII.get(CmpOpc);
1445  SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
1446  if (!UseImm) {
1447    SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
1448    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1449                    .addReg(SrcReg1).addReg(SrcReg2));
1450  } else {
1451    MachineInstrBuilder MIB;
1452    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1453      .addReg(SrcReg1);
1454
1455    // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1456    if (isICmp)
1457      MIB.addImm(Imm);
1458    AddOptionalDefs(MIB);
1459  }
1460
1461  // For floating point we need to move the result to a comparison register
1462  // that we can then use for branches.
1463  if (Ty->isFloatTy() || Ty->isDoubleTy())
1464    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1465                            TII.get(ARM::FMSTAT)));
1466  return true;
1467}
1468
1469bool ARMFastISel::SelectCmp(const Instruction *I) {
1470  const CmpInst *CI = cast<CmpInst>(I);
1471
1472  // Get the compare predicate.
1473  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1474
1475  // We may not handle every CC for now.
1476  if (ARMPred == ARMCC::AL) return false;
1477
1478  // Emit the compare.
1479  if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1480    return false;
1481
1482  // Now set a register based on the comparison. Explicitly set the predicates
1483  // here.
1484  unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1485  const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
1486                                           : &ARM::GPRRegClass;
1487  unsigned DestReg = createResultReg(RC);
1488  Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1489  unsigned ZeroReg = fastMaterializeConstant(Zero);
1490  // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1491  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
1492          .addReg(ZeroReg).addImm(1)
1493          .addImm(ARMPred).addReg(ARM::CPSR);
1494
1495  updateValueMap(I, DestReg);
1496  return true;
1497}
1498
1499bool ARMFastISel::SelectFPExt(const Instruction *I) {
1500  // Make sure we have VFP and that we're extending float to double.
1501  if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
1502
1503  Value *V = I->getOperand(0);
1504  if (!I->getType()->isDoubleTy() ||
1505      !V->getType()->isFloatTy()) return false;
1506
1507  unsigned Op = getRegForValue(V);
1508  if (Op == 0) return false;
1509
1510  unsigned Result = createResultReg(&ARM::DPRRegClass);
1511  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1512                          TII.get(ARM::VCVTDS), Result)
1513                  .addReg(Op));
1514  updateValueMap(I, Result);
1515  return true;
1516}
1517
1518bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1519  // Make sure we have VFP and that we're truncating double to float.
1520  if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
1521
1522  Value *V = I->getOperand(0);
1523  if (!(I->getType()->isFloatTy() &&
1524        V->getType()->isDoubleTy())) return false;
1525
1526  unsigned Op = getRegForValue(V);
1527  if (Op == 0) return false;
1528
1529  unsigned Result = createResultReg(&ARM::SPRRegClass);
1530  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1531                          TII.get(ARM::VCVTSD), Result)
1532                  .addReg(Op));
1533  updateValueMap(I, Result);
1534  return true;
1535}
1536
1537bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
1538  // Make sure we have VFP.
1539  if (!Subtarget->hasVFP2Base()) return false;
1540
1541  MVT DstVT;
1542  Type *Ty = I->getType();
1543  if (!isTypeLegal(Ty, DstVT))
1544    return false;
1545
1546  Value *Src = I->getOperand(0);
1547  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1548  if (!SrcEVT.isSimple())
1549    return false;
1550  MVT SrcVT = SrcEVT.getSimpleVT();
1551  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1552    return false;
1553
1554  unsigned SrcReg = getRegForValue(Src);
1555  if (SrcReg == 0) return false;
1556
1557  // Handle sign-extension.
1558  if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
1559    SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
1560                                       /*isZExt*/!isSigned);
1561    if (SrcReg == 0) return false;
1562  }
1563
1564  // The conversion routine works on fp-reg to fp-reg and the operand above
1565  // was an integer, move it to the fp registers if possible.
1566  unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
1567  if (FP == 0) return false;
1568
1569  unsigned Opc;
1570  if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1571  else if (Ty->isDoubleTy() && Subtarget->hasFP64())
1572    Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1573  else return false;
1574
1575  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1576  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1577                          TII.get(Opc), ResultReg).addReg(FP));
1578  updateValueMap(I, ResultReg);
1579  return true;
1580}
1581
1582bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
1583  // Make sure we have VFP.
1584  if (!Subtarget->hasVFP2Base()) return false;
1585
1586  MVT DstVT;
1587  Type *RetTy = I->getType();
1588  if (!isTypeLegal(RetTy, DstVT))
1589    return false;
1590
1591  unsigned Op = getRegForValue(I->getOperand(0));
1592  if (Op == 0) return false;
1593
1594  unsigned Opc;
1595  Type *OpTy = I->getOperand(0)->getType();
1596  if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1597  else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
1598    Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1599  else return false;
1600
1601  // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1602  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1603  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1604                          TII.get(Opc), ResultReg).addReg(Op));
1605
1606  // This result needs to be in an integer register, but the conversion only
1607  // takes place in fp-regs.
1608  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1609  if (IntReg == 0) return false;
1610
1611  updateValueMap(I, IntReg);
1612  return true;
1613}
1614
1615bool ARMFastISel::SelectSelect(const Instruction *I) {
1616  MVT VT;
1617  if (!isTypeLegal(I->getType(), VT))
1618    return false;
1619
1620  // Things need to be register sized for register moves.
1621  if (VT != MVT::i32) return false;
1622
1623  unsigned CondReg = getRegForValue(I->getOperand(0));
1624  if (CondReg == 0) return false;
1625  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1626  if (Op1Reg == 0) return false;
1627
1628  // Check to see if we can use an immediate in the conditional move.
1629  int Imm = 0;
1630  bool UseImm = false;
1631  bool isNegativeImm = false;
1632  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
1633    assert(VT == MVT::i32 && "Expecting an i32.");
1634    Imm = (int)ConstInt->getValue().getZExtValue();
1635    if (Imm < 0) {
1636      isNegativeImm = true;
1637      Imm = ~Imm;
1638    }
1639    UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1640      (ARM_AM::getSOImmVal(Imm) != -1);
1641  }
1642
1643  unsigned Op2Reg = 0;
1644  if (!UseImm) {
1645    Op2Reg = getRegForValue(I->getOperand(2));
1646    if (Op2Reg == 0) return false;
1647  }
1648
1649  unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1650  CondReg = constrainOperandRegClass(TII.get(TstOpc), CondReg, 0);
1651  AddOptionalDefs(
1652      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
1653          .addReg(CondReg)
1654          .addImm(1));
1655
1656  unsigned MovCCOpc;
1657  const TargetRegisterClass *RC;
1658  if (!UseImm) {
1659    RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
1660    MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1661  } else {
1662    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
1663    if (!isNegativeImm)
1664      MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1665    else
1666      MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1667  }
1668  unsigned ResultReg = createResultReg(RC);
1669  if (!UseImm) {
1670    Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
1671    Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
1672    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
1673            ResultReg)
1674        .addReg(Op2Reg)
1675        .addReg(Op1Reg)
1676        .addImm(ARMCC::NE)
1677        .addReg(ARM::CPSR);
1678  } else {
1679    Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
1680    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
1681            ResultReg)
1682        .addReg(Op1Reg)
1683        .addImm(Imm)
1684        .addImm(ARMCC::EQ)
1685        .addReg(ARM::CPSR);
1686  }
1687  updateValueMap(I, ResultReg);
1688  return true;
1689}
1690
1691bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
1692  MVT VT;
1693  Type *Ty = I->getType();
1694  if (!isTypeLegal(Ty, VT))
1695    return false;
1696
1697  // If we have integer div support we should have selected this automagically.
1698  // In case we have a real miss go ahead and return false and we'll pick
1699  // it up later.
1700  if (Subtarget->hasDivideInThumbMode())
1701    return false;
1702
1703  // Otherwise emit a libcall.
1704  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1705  if (VT == MVT::i8)
1706    LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1707  else if (VT == MVT::i16)
1708    LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1709  else if (VT == MVT::i32)
1710    LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1711  else if (VT == MVT::i64)
1712    LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1713  else if (VT == MVT::i128)
1714    LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1715  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1716
1717  return ARMEmitLibcall(I, LC);
1718}
1719
1720bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
1721  MVT VT;
1722  Type *Ty = I->getType();
1723  if (!isTypeLegal(Ty, VT))
1724    return false;
1725
1726  // Many ABIs do not provide a libcall for standalone remainder, so we need to
1727  // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1728  // multi-reg returns, we'll have to bail out.
1729  if (!TLI.hasStandaloneRem(VT)) {
1730    return false;
1731  }
1732
1733  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1734  if (VT == MVT::i8)
1735    LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1736  else if (VT == MVT::i16)
1737    LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1738  else if (VT == MVT::i32)
1739    LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1740  else if (VT == MVT::i64)
1741    LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1742  else if (VT == MVT::i128)
1743    LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1744  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1745
1746  return ARMEmitLibcall(I, LC);
1747}
1748
1749bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1750  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1751
1752  // We can get here in the case when we have a binary operation on a non-legal
1753  // type and the target independent selector doesn't know how to handle it.
1754  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1755    return false;
1756
1757  unsigned Opc;
1758  switch (ISDOpcode) {
1759    default: return false;
1760    case ISD::ADD:
1761      Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1762      break;
1763    case ISD::OR:
1764      Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1765      break;
1766    case ISD::SUB:
1767      Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1768      break;
1769  }
1770
1771  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1772  if (SrcReg1 == 0) return false;
1773
1774  // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1775  // in the instruction, rather then materializing the value in a register.
1776  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1777  if (SrcReg2 == 0) return false;
1778
1779  unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
1780  SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
1781  SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
1782  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1783                          TII.get(Opc), ResultReg)
1784                  .addReg(SrcReg1).addReg(SrcReg2));
1785  updateValueMap(I, ResultReg);
1786  return true;
1787}
1788
1789bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
1790  EVT FPVT = TLI.getValueType(DL, I->getType(), true);
1791  if (!FPVT.isSimple()) return false;
1792  MVT VT = FPVT.getSimpleVT();
1793
1794  // FIXME: Support vector types where possible.
1795  if (VT.isVector())
1796    return false;
1797
1798  // We can get here in the case when we want to use NEON for our fp
1799  // operations, but can't figure out how to. Just use the vfp instructions
1800  // if we have them.
1801  // FIXME: It'd be nice to use NEON instructions.
1802  Type *Ty = I->getType();
1803  if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
1804    return false;
1805  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
1806    return false;
1807
1808  unsigned Opc;
1809  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1810  switch (ISDOpcode) {
1811    default: return false;
1812    case ISD::FADD:
1813      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1814      break;
1815    case ISD::FSUB:
1816      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1817      break;
1818    case ISD::FMUL:
1819      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1820      break;
1821  }
1822  unsigned Op1 = getRegForValue(I->getOperand(0));
1823  if (Op1 == 0) return false;
1824
1825  unsigned Op2 = getRegForValue(I->getOperand(1));
1826  if (Op2 == 0) return false;
1827
1828  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
1829  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1830                          TII.get(Opc), ResultReg)
1831                  .addReg(Op1).addReg(Op2));
1832  updateValueMap(I, ResultReg);
1833  return true;
1834}
1835
1836// Call Handling Code
1837
1838// This is largely taken directly from CCAssignFnForNode
1839// TODO: We may not support all of this.
1840CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1841                                           bool Return,
1842                                           bool isVarArg) {
1843  switch (CC) {
1844  default:
1845    report_fatal_error("Unsupported calling convention");
1846  case CallingConv::Fast:
1847    if (Subtarget->hasVFP2Base() && !isVarArg) {
1848      if (!Subtarget->isAAPCS_ABI())
1849        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1850      // For AAPCS ABI targets, just use VFP variant of the calling convention.
1851      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1852    }
1853    LLVM_FALLTHROUGH;
1854  case CallingConv::C:
1855  case CallingConv::CXX_FAST_TLS:
1856    // Use target triple & subtarget features to do actual dispatch.
1857    if (Subtarget->isAAPCS_ABI()) {
1858      if (Subtarget->hasVFP2Base() &&
1859          TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
1860        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1861      else
1862        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1863    } else {
1864      return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1865    }
1866  case CallingConv::ARM_AAPCS_VFP:
1867  case CallingConv::Swift:
1868    if (!isVarArg)
1869      return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1870    // Fall through to soft float variant, variadic functions don't
1871    // use hard floating point ABI.
1872    LLVM_FALLTHROUGH;
1873  case CallingConv::ARM_AAPCS:
1874    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1875  case CallingConv::ARM_APCS:
1876    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1877  case CallingConv::GHC:
1878    if (Return)
1879      report_fatal_error("Can't return in GHC call convention");
1880    else
1881      return CC_ARM_APCS_GHC;
1882  case CallingConv::CFGuard_Check:
1883    return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
1884  }
1885}
1886
1887bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1888                                  SmallVectorImpl<Register> &ArgRegs,
1889                                  SmallVectorImpl<MVT> &ArgVTs,
1890                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1891                                  SmallVectorImpl<Register> &RegArgs,
1892                                  CallingConv::ID CC,
1893                                  unsigned &NumBytes,
1894                                  bool isVarArg) {
1895  SmallVector<CCValAssign, 16> ArgLocs;
1896  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
1897  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
1898                             CCAssignFnForCall(CC, false, isVarArg));
1899
1900  // Check that we can handle all of the arguments. If we can't, then bail out
1901  // now before we add code to the MBB.
1902  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1903    CCValAssign &VA = ArgLocs[i];
1904    MVT ArgVT = ArgVTs[VA.getValNo()];
1905
1906    // We don't handle NEON/vector parameters yet.
1907    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1908      return false;
1909
1910    // Now copy/store arg to correct locations.
1911    if (VA.isRegLoc() && !VA.needsCustom()) {
1912      continue;
1913    } else if (VA.needsCustom()) {
1914      // TODO: We need custom lowering for vector (v2f64) args.
1915      if (VA.getLocVT() != MVT::f64 ||
1916          // TODO: Only handle register args for now.
1917          !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
1918        return false;
1919    } else {
1920      switch (ArgVT.SimpleTy) {
1921      default:
1922        return false;
1923      case MVT::i1:
1924      case MVT::i8:
1925      case MVT::i16:
1926      case MVT::i32:
1927        break;
1928      case MVT::f32:
1929        if (!Subtarget->hasVFP2Base())
1930          return false;
1931        break;
1932      case MVT::f64:
1933        if (!Subtarget->hasVFP2Base())
1934          return false;
1935        break;
1936      }
1937    }
1938  }
1939
1940  // At the point, we are able to handle the call's arguments in fast isel.
1941
1942  // Get a count of how many bytes are to be pushed on the stack.
1943  NumBytes = CCInfo.getNextStackOffset();
1944
1945  // Issue CALLSEQ_START
1946  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1947  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1948                          TII.get(AdjStackDown))
1949                  .addImm(NumBytes).addImm(0));
1950
1951  // Process the args.
1952  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1953    CCValAssign &VA = ArgLocs[i];
1954    const Value *ArgVal = Args[VA.getValNo()];
1955    Register Arg = ArgRegs[VA.getValNo()];
1956    MVT ArgVT = ArgVTs[VA.getValNo()];
1957
1958    assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
1959           "We don't handle NEON/vector parameters yet.");
1960
1961    // Handle arg promotion, etc.
1962    switch (VA.getLocInfo()) {
1963      case CCValAssign::Full: break;
1964      case CCValAssign::SExt: {
1965        MVT DestVT = VA.getLocVT();
1966        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
1967        assert(Arg != 0 && "Failed to emit a sext");
1968        ArgVT = DestVT;
1969        break;
1970      }
1971      case CCValAssign::AExt:
1972      // Intentional fall-through.  Handle AExt and ZExt.
1973      case CCValAssign::ZExt: {
1974        MVT DestVT = VA.getLocVT();
1975        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
1976        assert(Arg != 0 && "Failed to emit a zext");
1977        ArgVT = DestVT;
1978        break;
1979      }
1980      case CCValAssign::BCvt: {
1981        unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
1982                                 /*TODO: Kill=*/false);
1983        assert(BC != 0 && "Failed to emit a bitcast!");
1984        Arg = BC;
1985        ArgVT = VA.getLocVT();
1986        break;
1987      }
1988      default: llvm_unreachable("Unknown arg promotion!");
1989    }
1990
1991    // Now copy/store arg to correct locations.
1992    if (VA.isRegLoc() && !VA.needsCustom()) {
1993      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1994              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
1995      RegArgs.push_back(VA.getLocReg());
1996    } else if (VA.needsCustom()) {
1997      // TODO: We need custom lowering for vector (v2f64) args.
1998      assert(VA.getLocVT() == MVT::f64 &&
1999             "Custom lowering for v2f64 args not available");
2000
2001      // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
2002      CCValAssign &NextVA = ArgLocs[++i];
2003
2004      assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2005             "We only handle register args!");
2006
2007      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2008                              TII.get(ARM::VMOVRRD), VA.getLocReg())
2009                      .addReg(NextVA.getLocReg(), RegState::Define)
2010                      .addReg(Arg));
2011      RegArgs.push_back(VA.getLocReg());
2012      RegArgs.push_back(NextVA.getLocReg());
2013    } else {
2014      assert(VA.isMemLoc());
2015      // Need to store on the stack.
2016
2017      // Don't emit stores for undef values.
2018      if (isa<UndefValue>(ArgVal))
2019        continue;
2020
2021      Address Addr;
2022      Addr.BaseType = Address::RegBase;
2023      Addr.Base.Reg = ARM::SP;
2024      Addr.Offset = VA.getLocMemOffset();
2025
2026      bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
2027      assert(EmitRet && "Could not emit a store for argument!");
2028    }
2029  }
2030
2031  return true;
2032}
2033
2034bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
2035                             const Instruction *I, CallingConv::ID CC,
2036                             unsigned &NumBytes, bool isVarArg) {
2037  // Issue CALLSEQ_END
2038  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2039  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2040                          TII.get(AdjStackUp))
2041                  .addImm(NumBytes).addImm(0));
2042
2043  // Now the return value.
2044  if (RetVT != MVT::isVoid) {
2045    SmallVector<CCValAssign, 16> RVLocs;
2046    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
2047    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2048
2049    // Copy all of the result registers out of their specified physreg.
2050    if (RVLocs.size() == 2 && RetVT == MVT::f64) {
2051      // For this move we copy into two registers and then move into the
2052      // double fp reg we want.
2053      MVT DestVT = RVLocs[0].getValVT();
2054      const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
2055      Register ResultReg = createResultReg(DstRC);
2056      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2057                              TII.get(ARM::VMOVDRR), ResultReg)
2058                      .addReg(RVLocs[0].getLocReg())
2059                      .addReg(RVLocs[1].getLocReg()));
2060
2061      UsedRegs.push_back(RVLocs[0].getLocReg());
2062      UsedRegs.push_back(RVLocs[1].getLocReg());
2063
2064      // Finally update the result.
2065      updateValueMap(I, ResultReg);
2066    } else {
2067      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2068      MVT CopyVT = RVLocs[0].getValVT();
2069
2070      // Special handling for extended integers.
2071      if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
2072        CopyVT = MVT::i32;
2073
2074      const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
2075
2076      Register ResultReg = createResultReg(DstRC);
2077      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2078              TII.get(TargetOpcode::COPY),
2079              ResultReg).addReg(RVLocs[0].getLocReg());
2080      UsedRegs.push_back(RVLocs[0].getLocReg());
2081
2082      // Finally update the result.
2083      updateValueMap(I, ResultReg);
2084    }
2085  }
2086
2087  return true;
2088}
2089
2090bool ARMFastISel::SelectRet(const Instruction *I) {
2091  const ReturnInst *Ret = cast<ReturnInst>(I);
2092  const Function &F = *I->getParent()->getParent();
2093
2094  if (!FuncInfo.CanLowerReturn)
2095    return false;
2096
2097  if (TLI.supportSwiftError() &&
2098      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
2099    return false;
2100
2101  if (TLI.supportSplitCSR(FuncInfo.MF))
2102    return false;
2103
2104  // Build a list of return value registers.
2105  SmallVector<unsigned, 4> RetRegs;
2106
2107  CallingConv::ID CC = F.getCallingConv();
2108  if (Ret->getNumOperands() > 0) {
2109    SmallVector<ISD::OutputArg, 4> Outs;
2110    GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
2111
2112    // Analyze operands of the call, assigning locations to each operand.
2113    SmallVector<CCValAssign, 16> ValLocs;
2114    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2115    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
2116                                                 F.isVarArg()));
2117
2118    const Value *RV = Ret->getOperand(0);
2119    unsigned Reg = getRegForValue(RV);
2120    if (Reg == 0)
2121      return false;
2122
2123    // Only handle a single return value for now.
2124    if (ValLocs.size() != 1)
2125      return false;
2126
2127    CCValAssign &VA = ValLocs[0];
2128
2129    // Don't bother handling odd stuff for now.
2130    if (VA.getLocInfo() != CCValAssign::Full)
2131      return false;
2132    // Only handle register returns for now.
2133    if (!VA.isRegLoc())
2134      return false;
2135
2136    unsigned SrcReg = Reg + VA.getValNo();
2137    EVT RVEVT = TLI.getValueType(DL, RV->getType());
2138    if (!RVEVT.isSimple()) return false;
2139    MVT RVVT = RVEVT.getSimpleVT();
2140    MVT DestVT = VA.getValVT();
2141    // Special handling for extended integers.
2142    if (RVVT != DestVT) {
2143      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2144        return false;
2145
2146      assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2147
2148      // Perform extension if flagged as either zext or sext.  Otherwise, do
2149      // nothing.
2150      if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
2151        SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
2152        if (SrcReg == 0) return false;
2153      }
2154    }
2155
2156    // Make the copy.
2157    Register DstReg = VA.getLocReg();
2158    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
2159    // Avoid a cross-class copy. This is very unlikely.
2160    if (!SrcRC->contains(DstReg))
2161      return false;
2162    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2163            TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
2164
2165    // Add register to return instruction.
2166    RetRegs.push_back(VA.getLocReg());
2167  }
2168
2169  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2170                                    TII.get(Subtarget->getReturnOpcode()));
2171  AddOptionalDefs(MIB);
2172  for (unsigned R : RetRegs)
2173    MIB.addReg(R, RegState::Implicit);
2174  return true;
2175}
2176
2177unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2178  if (UseReg)
2179    return isThumb2 ? ARM::tBLXr : ARM::BLX;
2180  else
2181    return isThumb2 ? ARM::tBL : ARM::BL;
2182}
2183
2184unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
2185  // Manually compute the global's type to avoid building it when unnecessary.
2186  Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
2187  EVT LCREVT = TLI.getValueType(DL, GVTy);
2188  if (!LCREVT.isSimple()) return 0;
2189
2190  GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
2191                                       GlobalValue::ExternalLinkage, nullptr,
2192                                       Name);
2193  assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
2194  return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
2195}
2196
2197// A quick function that will emit a call for a named libcall in F with the
2198// vector of passed arguments for the Instruction in I. We can assume that we
2199// can emit a call for any libcall we can produce. This is an abridged version
2200// of the full call infrastructure since we won't need to worry about things
2201// like computed function pointers or strange arguments at call sites.
2202// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2203// with X86.
2204bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2205  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2206
2207  // Handle *simple* calls for now.
2208  Type *RetTy = I->getType();
2209  MVT RetVT;
2210  if (RetTy->isVoidTy())
2211    RetVT = MVT::isVoid;
2212  else if (!isTypeLegal(RetTy, RetVT))
2213    return false;
2214
2215  // Can't handle non-double multi-reg retvals.
2216  if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2217    SmallVector<CCValAssign, 16> RVLocs;
2218    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2219    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
2220    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2221      return false;
2222  }
2223
2224  // Set up the argument vectors.
2225  SmallVector<Value*, 8> Args;
2226  SmallVector<Register, 8> ArgRegs;
2227  SmallVector<MVT, 8> ArgVTs;
2228  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2229  Args.reserve(I->getNumOperands());
2230  ArgRegs.reserve(I->getNumOperands());
2231  ArgVTs.reserve(I->getNumOperands());
2232  ArgFlags.reserve(I->getNumOperands());
2233  for (Value *Op :  I->operands()) {
2234    unsigned Arg = getRegForValue(Op);
2235    if (Arg == 0) return false;
2236
2237    Type *ArgTy = Op->getType();
2238    MVT ArgVT;
2239    if (!isTypeLegal(ArgTy, ArgVT)) return false;
2240
2241    ISD::ArgFlagsTy Flags;
2242    Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy)));
2243
2244    Args.push_back(Op);
2245    ArgRegs.push_back(Arg);
2246    ArgVTs.push_back(ArgVT);
2247    ArgFlags.push_back(Flags);
2248  }
2249
2250  // Handle the arguments now that we've gotten them.
2251  SmallVector<Register, 4> RegArgs;
2252  unsigned NumBytes;
2253  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2254                       RegArgs, CC, NumBytes, false))
2255    return false;
2256
2257  Register CalleeReg;
2258  if (Subtarget->genLongCalls()) {
2259    CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
2260    if (CalleeReg == 0) return false;
2261  }
2262
2263  // Issue the call.
2264  unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
2265  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2266                                    DbgLoc, TII.get(CallOpc));
2267  // BL / BLX don't take a predicate, but tBL / tBLX do.
2268  if (isThumb2)
2269    MIB.add(predOps(ARMCC::AL));
2270  if (Subtarget->genLongCalls())
2271    MIB.addReg(CalleeReg);
2272  else
2273    MIB.addExternalSymbol(TLI.getLibcallName(Call));
2274
2275  // Add implicit physical register uses to the call.
2276  for (Register R : RegArgs)
2277    MIB.addReg(R, RegState::Implicit);
2278
2279  // Add a register mask with the call-preserved registers.
2280  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2281  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
2282
2283  // Finish off the call including any return values.
2284  SmallVector<Register, 4> UsedRegs;
2285  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
2286
2287  // Set all unused physreg defs as dead.
2288  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2289
2290  return true;
2291}
2292
2293bool ARMFastISel::SelectCall(const Instruction *I,
2294                             const char *IntrMemName = nullptr) {
2295  const CallInst *CI = cast<CallInst>(I);
2296  const Value *Callee = CI->getCalledValue();
2297
2298  // Can't handle inline asm.
2299  if (isa<InlineAsm>(Callee)) return false;
2300
2301  // Allow SelectionDAG isel to handle tail calls.
2302  if (CI->isTailCall()) return false;
2303
2304  // Check the calling convention.
2305  ImmutableCallSite CS(CI);
2306  CallingConv::ID CC = CS.getCallingConv();
2307
2308  // TODO: Avoid some calling conventions?
2309
2310  FunctionType *FTy = CS.getFunctionType();
2311  bool isVarArg = FTy->isVarArg();
2312
2313  // Handle *simple* calls for now.
2314  Type *RetTy = I->getType();
2315  MVT RetVT;
2316  if (RetTy->isVoidTy())
2317    RetVT = MVT::isVoid;
2318  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
2319           RetVT != MVT::i8  && RetVT != MVT::i1)
2320    return false;
2321
2322  // Can't handle non-double multi-reg retvals.
2323  if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2324      RetVT != MVT::i16 && RetVT != MVT::i32) {
2325    SmallVector<CCValAssign, 16> RVLocs;
2326    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
2327    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2328    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2329      return false;
2330  }
2331
2332  // Set up the argument vectors.
2333  SmallVector<Value*, 8> Args;
2334  SmallVector<Register, 8> ArgRegs;
2335  SmallVector<MVT, 8> ArgVTs;
2336  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2337  unsigned arg_size = CS.arg_size();
2338  Args.reserve(arg_size);
2339  ArgRegs.reserve(arg_size);
2340  ArgVTs.reserve(arg_size);
2341  ArgFlags.reserve(arg_size);
2342  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2343       i != e; ++i) {
2344    // If we're lowering a memory intrinsic instead of a regular call, skip the
2345    // last argument, which shouldn't be passed to the underlying function.
2346    if (IntrMemName && e - i <= 1)
2347      break;
2348
2349    ISD::ArgFlagsTy Flags;
2350    unsigned ArgIdx = i - CS.arg_begin();
2351    if (CS.paramHasAttr(ArgIdx, Attribute::SExt))
2352      Flags.setSExt();
2353    if (CS.paramHasAttr(ArgIdx, Attribute::ZExt))
2354      Flags.setZExt();
2355
2356    // FIXME: Only handle *easy* calls for now.
2357    if (CS.paramHasAttr(ArgIdx, Attribute::InReg) ||
2358        CS.paramHasAttr(ArgIdx, Attribute::StructRet) ||
2359        CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
2360        CS.paramHasAttr(ArgIdx, Attribute::SwiftError) ||
2361        CS.paramHasAttr(ArgIdx, Attribute::Nest) ||
2362        CS.paramHasAttr(ArgIdx, Attribute::ByVal))
2363      return false;
2364
2365    Type *ArgTy = (*i)->getType();
2366    MVT ArgVT;
2367    if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2368        ArgVT != MVT::i1)
2369      return false;
2370
2371    Register Arg = getRegForValue(*i);
2372    if (!Arg.isValid())
2373      return false;
2374
2375    Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy)));
2376
2377    Args.push_back(*i);
2378    ArgRegs.push_back(Arg);
2379    ArgVTs.push_back(ArgVT);
2380    ArgFlags.push_back(Flags);
2381  }
2382
2383  // Handle the arguments now that we've gotten them.
2384  SmallVector<Register, 4> RegArgs;
2385  unsigned NumBytes;
2386  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2387                       RegArgs, CC, NumBytes, isVarArg))
2388    return false;
2389
2390  bool UseReg = false;
2391  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
2392  if (!GV || Subtarget->genLongCalls()) UseReg = true;
2393
2394  Register CalleeReg;
2395  if (UseReg) {
2396    if (IntrMemName)
2397      CalleeReg = getLibcallReg(IntrMemName);
2398    else
2399      CalleeReg = getRegForValue(Callee);
2400
2401    if (CalleeReg == 0) return false;
2402  }
2403
2404  // Issue the call.
2405  unsigned CallOpc = ARMSelectCallOp(UseReg);
2406  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2407                                    DbgLoc, TII.get(CallOpc));
2408
2409  // ARM calls don't take a predicate, but tBL / tBLX do.
2410  if(isThumb2)
2411    MIB.add(predOps(ARMCC::AL));
2412  if (UseReg)
2413    MIB.addReg(CalleeReg);
2414  else if (!IntrMemName)
2415    MIB.addGlobalAddress(GV, 0, 0);
2416  else
2417    MIB.addExternalSymbol(IntrMemName, 0);
2418
2419  // Add implicit physical register uses to the call.
2420  for (Register R : RegArgs)
2421    MIB.addReg(R, RegState::Implicit);
2422
2423  // Add a register mask with the call-preserved registers.
2424  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2425  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
2426
2427  // Finish off the call including any return values.
2428  SmallVector<Register, 4> UsedRegs;
2429  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2430    return false;
2431
2432  // Set all unused physreg defs as dead.
2433  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2434
2435  return true;
2436}
2437
2438bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2439  return Len <= 16;
2440}
2441
2442bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
2443                                        uint64_t Len, unsigned Alignment) {
2444  // Make sure we don't bloat code by inlining very large memcpy's.
2445  if (!ARMIsMemCpySmall(Len))
2446    return false;
2447
2448  while (Len) {
2449    MVT VT;
2450    if (!Alignment || Alignment >= 4) {
2451      if (Len >= 4)
2452        VT = MVT::i32;
2453      else if (Len >= 2)
2454        VT = MVT::i16;
2455      else {
2456        assert(Len == 1 && "Expected a length of 1!");
2457        VT = MVT::i8;
2458      }
2459    } else {
2460      // Bound based on alignment.
2461      if (Len >= 2 && Alignment == 2)
2462        VT = MVT::i16;
2463      else {
2464        VT = MVT::i8;
2465      }
2466    }
2467
2468    bool RV;
2469    Register ResultReg;
2470    RV = ARMEmitLoad(VT, ResultReg, Src);
2471    assert(RV && "Should be able to handle this load.");
2472    RV = ARMEmitStore(VT, ResultReg, Dest);
2473    assert(RV && "Should be able to handle this store.");
2474    (void)RV;
2475
2476    unsigned Size = VT.getSizeInBits()/8;
2477    Len -= Size;
2478    Dest.Offset += Size;
2479    Src.Offset += Size;
2480  }
2481
2482  return true;
2483}
2484
2485bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2486  // FIXME: Handle more intrinsics.
2487  switch (I.getIntrinsicID()) {
2488  default: return false;
2489  case Intrinsic::frameaddress: {
2490    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
2491    MFI.setFrameAddressIsTaken(true);
2492
2493    unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
2494    const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
2495                                             : &ARM::GPRRegClass;
2496
2497    const ARMBaseRegisterInfo *RegInfo =
2498        static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
2499    Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2500    unsigned SrcReg = FramePtr;
2501
2502    // Recursively load frame address
2503    // ldr r0 [fp]
2504    // ldr r0 [r0]
2505    // ldr r0 [r0]
2506    // ...
2507    unsigned DestReg;
2508    unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
2509    while (Depth--) {
2510      DestReg = createResultReg(RC);
2511      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2512                              TII.get(LdrOpc), DestReg)
2513                      .addReg(SrcReg).addImm(0));
2514      SrcReg = DestReg;
2515    }
2516    updateValueMap(&I, SrcReg);
2517    return true;
2518  }
2519  case Intrinsic::memcpy:
2520  case Intrinsic::memmove: {
2521    const MemTransferInst &MTI = cast<MemTransferInst>(I);
2522    // Don't handle volatile.
2523    if (MTI.isVolatile())
2524      return false;
2525
2526    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2527    // we would emit dead code because we don't currently handle memmoves.
2528    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2529    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
2530      // Small memcpy's are common enough that we want to do them without a call
2531      // if possible.
2532      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
2533      if (ARMIsMemCpySmall(Len)) {
2534        Address Dest, Src;
2535        if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
2536            !ARMComputeAddress(MTI.getRawSource(), Src))
2537          return false;
2538        unsigned Alignment = MinAlign(MTI.getDestAlignment(),
2539                                      MTI.getSourceAlignment());
2540        if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2541          return true;
2542      }
2543    }
2544
2545    if (!MTI.getLength()->getType()->isIntegerTy(32))
2546      return false;
2547
2548    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2549      return false;
2550
2551    const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
2552    return SelectCall(&I, IntrMemName);
2553  }
2554  case Intrinsic::memset: {
2555    const MemSetInst &MSI = cast<MemSetInst>(I);
2556    // Don't handle volatile.
2557    if (MSI.isVolatile())
2558      return false;
2559
2560    if (!MSI.getLength()->getType()->isIntegerTy(32))
2561      return false;
2562
2563    if (MSI.getDestAddressSpace() > 255)
2564      return false;
2565
2566    return SelectCall(&I, "memset");
2567  }
2568  case Intrinsic::trap: {
2569    unsigned Opcode;
2570    if (Subtarget->isThumb())
2571      Opcode = ARM::tTRAP;
2572    else
2573      Opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
2574    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode));
2575    return true;
2576  }
2577  }
2578}
2579
2580bool ARMFastISel::SelectTrunc(const Instruction *I) {
2581  // The high bits for a type smaller than the register size are assumed to be
2582  // undefined.
2583  Value *Op = I->getOperand(0);
2584
2585  EVT SrcVT, DestVT;
2586  SrcVT = TLI.getValueType(DL, Op->getType(), true);
2587  DestVT = TLI.getValueType(DL, I->getType(), true);
2588
2589  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2590    return false;
2591  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2592    return false;
2593
2594  unsigned SrcReg = getRegForValue(Op);
2595  if (!SrcReg) return false;
2596
2597  // Because the high bits are undefined, a truncate doesn't generate
2598  // any code.
2599  updateValueMap(I, SrcReg);
2600  return true;
2601}
2602
2603unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2604                                    bool isZExt) {
2605  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2606    return 0;
2607  if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
2608    return 0;
2609
2610  // Table of which combinations can be emitted as a single instruction,
2611  // and which will require two.
2612  static const uint8_t isSingleInstrTbl[3][2][2][2] = {
2613    //            ARM                     Thumb
2614    //           !hasV6Ops  hasV6Ops     !hasV6Ops  hasV6Ops
2615    //    ext:     s  z      s  z          s  z      s  z
2616    /*  1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2617    /*  8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2618    /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2619  };
2620
2621  // Target registers for:
2622  //  - For ARM can never be PC.
2623  //  - For 16-bit Thumb are restricted to lower 8 registers.
2624  //  - For 32-bit Thumb are restricted to non-SP and non-PC.
2625  static const TargetRegisterClass *RCTbl[2][2] = {
2626    // Instructions: Two                     Single
2627    /* ARM      */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
2628    /* Thumb    */ { &ARM::tGPRRegClass,    &ARM::rGPRRegClass    }
2629  };
2630
2631  // Table governing the instruction(s) to be emitted.
2632  static const struct InstructionTable {
2633    uint32_t Opc   : 16;
2634    uint32_t hasS  :  1; // Some instructions have an S bit, always set it to 0.
2635    uint32_t Shift :  7; // For shift operand addressing mode, used by MOVsi.
2636    uint32_t Imm   :  8; // All instructions have either a shift or a mask.
2637  } IT[2][2][3][2] = {
2638    { // Two instructions (first is left shift, second is in this table).
2639      { // ARM                Opc           S  Shift             Imm
2640        /*  1 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  31 },
2641        /*  1 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  31 } },
2642        /*  8 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  24 },
2643        /*  8 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  24 } },
2644        /* 16 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  16 },
2645        /* 16 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  16 } }
2646      },
2647      { // Thumb              Opc           S  Shift             Imm
2648        /*  1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  31 },
2649        /*  1 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  31 } },
2650        /*  8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  24 },
2651        /*  8 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  24 } },
2652        /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  16 },
2653        /* 16 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  16 } }
2654      }
2655    },
2656    { // Single instruction.
2657      { // ARM                Opc           S  Shift             Imm
2658        /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
2659        /*  1 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift,   1 } },
2660        /*  8 bit sext */ { { ARM::SXTB   , 0, ARM_AM::no_shift,   0 },
2661        /*  8 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift, 255 } },
2662        /* 16 bit sext */ { { ARM::SXTH   , 0, ARM_AM::no_shift,   0 },
2663        /* 16 bit zext */   { ARM::UXTH   , 0, ARM_AM::no_shift,   0 } }
2664      },
2665      { // Thumb              Opc           S  Shift             Imm
2666        /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
2667        /*  1 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift,   1 } },
2668        /*  8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift,   0 },
2669        /*  8 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
2670        /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift,   0 },
2671        /* 16 bit zext */   { ARM::t2UXTH , 0, ARM_AM::no_shift,   0 } }
2672      }
2673    }
2674  };
2675
2676  unsigned SrcBits = SrcVT.getSizeInBits();
2677  unsigned DestBits = DestVT.getSizeInBits();
2678  (void) DestBits;
2679  assert((SrcBits < DestBits) && "can only extend to larger types");
2680  assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
2681         "other sizes unimplemented");
2682  assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
2683         "other sizes unimplemented");
2684
2685  bool hasV6Ops = Subtarget->hasV6Ops();
2686  unsigned Bitness = SrcBits / 8;  // {1,8,16}=>{0,1,2}
2687  assert((Bitness < 3) && "sanity-check table bounds");
2688
2689  bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
2690  const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
2691  const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
2692  unsigned Opc = ITP->Opc;
2693  assert(ARM::KILL != Opc && "Invalid table entry");
2694  unsigned hasS = ITP->hasS;
2695  ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
2696  assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
2697         "only MOVsi has shift operand addressing mode");
2698  unsigned Imm = ITP->Imm;
2699
2700  // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2701  bool setsCPSR = &ARM::tGPRRegClass == RC;
2702  unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
2703  unsigned ResultReg;
2704  // MOVsi encodes shift and immediate in shift operand addressing mode.
2705  // The following condition has the same value when emitting two
2706  // instruction sequences: both are shifts.
2707  bool ImmIsSO = (Shift != ARM_AM::no_shift);
2708
2709  // Either one or two instructions are emitted.
2710  // They're always of the form:
2711  //   dst = in OP imm
2712  // CPSR is set only by 16-bit Thumb instructions.
2713  // Predicate, if any, is AL.
2714  // S bit, if available, is always 0.
2715  // When two are emitted the first's result will feed as the second's input,
2716  // that value is then dead.
2717  unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
2718  for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
2719    ResultReg = createResultReg(RC);
2720    bool isLsl = (0 == Instr) && !isSingleInstr;
2721    unsigned Opcode = isLsl ? LSLOpc : Opc;
2722    ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
2723    unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
2724    bool isKill = 1 == Instr;
2725    MachineInstrBuilder MIB = BuildMI(
2726        *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg);
2727    if (setsCPSR)
2728      MIB.addReg(ARM::CPSR, RegState::Define);
2729    SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
2730    MIB.addReg(SrcReg, isKill * RegState::Kill)
2731        .addImm(ImmEnc)
2732        .add(predOps(ARMCC::AL));
2733    if (hasS)
2734      MIB.add(condCodeOp());
2735    // Second instruction consumes the first's result.
2736    SrcReg = ResultReg;
2737  }
2738
2739  return ResultReg;
2740}
2741
2742bool ARMFastISel::SelectIntExt(const Instruction *I) {
2743  // On ARM, in general, integer casts don't involve legal types; this code
2744  // handles promotable integers.
2745  Type *DestTy = I->getType();
2746  Value *Src = I->getOperand(0);
2747  Type *SrcTy = Src->getType();
2748
2749  bool isZExt = isa<ZExtInst>(I);
2750  unsigned SrcReg = getRegForValue(Src);
2751  if (!SrcReg) return false;
2752
2753  EVT SrcEVT, DestEVT;
2754  SrcEVT = TLI.getValueType(DL, SrcTy, true);
2755  DestEVT = TLI.getValueType(DL, DestTy, true);
2756  if (!SrcEVT.isSimple()) return false;
2757  if (!DestEVT.isSimple()) return false;
2758
2759  MVT SrcVT = SrcEVT.getSimpleVT();
2760  MVT DestVT = DestEVT.getSimpleVT();
2761  unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2762  if (ResultReg == 0) return false;
2763  updateValueMap(I, ResultReg);
2764  return true;
2765}
2766
2767bool ARMFastISel::SelectShift(const Instruction *I,
2768                              ARM_AM::ShiftOpc ShiftTy) {
2769  // We handle thumb2 mode by target independent selector
2770  // or SelectionDAG ISel.
2771  if (isThumb2)
2772    return false;
2773
2774  // Only handle i32 now.
2775  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
2776  if (DestVT != MVT::i32)
2777    return false;
2778
2779  unsigned Opc = ARM::MOVsr;
2780  unsigned ShiftImm;
2781  Value *Src2Value = I->getOperand(1);
2782  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
2783    ShiftImm = CI->getZExtValue();
2784
2785    // Fall back to selection DAG isel if the shift amount
2786    // is zero or greater than the width of the value type.
2787    if (ShiftImm == 0 || ShiftImm >=32)
2788      return false;
2789
2790    Opc = ARM::MOVsi;
2791  }
2792
2793  Value *Src1Value = I->getOperand(0);
2794  unsigned Reg1 = getRegForValue(Src1Value);
2795  if (Reg1 == 0) return false;
2796
2797  unsigned Reg2 = 0;
2798  if (Opc == ARM::MOVsr) {
2799    Reg2 = getRegForValue(Src2Value);
2800    if (Reg2 == 0) return false;
2801  }
2802
2803  unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
2804  if(ResultReg == 0) return false;
2805
2806  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2807                                    TII.get(Opc), ResultReg)
2808                            .addReg(Reg1);
2809
2810  if (Opc == ARM::MOVsi)
2811    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
2812  else if (Opc == ARM::MOVsr) {
2813    MIB.addReg(Reg2);
2814    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
2815  }
2816
2817  AddOptionalDefs(MIB);
2818  updateValueMap(I, ResultReg);
2819  return true;
2820}
2821
2822// TODO: SoftFP support.
2823bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
2824  switch (I->getOpcode()) {
2825    case Instruction::Load:
2826      return SelectLoad(I);
2827    case Instruction::Store:
2828      return SelectStore(I);
2829    case Instruction::Br:
2830      return SelectBranch(I);
2831    case Instruction::IndirectBr:
2832      return SelectIndirectBr(I);
2833    case Instruction::ICmp:
2834    case Instruction::FCmp:
2835      return SelectCmp(I);
2836    case Instruction::FPExt:
2837      return SelectFPExt(I);
2838    case Instruction::FPTrunc:
2839      return SelectFPTrunc(I);
2840    case Instruction::SIToFP:
2841      return SelectIToFP(I, /*isSigned*/ true);
2842    case Instruction::UIToFP:
2843      return SelectIToFP(I, /*isSigned*/ false);
2844    case Instruction::FPToSI:
2845      return SelectFPToI(I, /*isSigned*/ true);
2846    case Instruction::FPToUI:
2847      return SelectFPToI(I, /*isSigned*/ false);
2848    case Instruction::Add:
2849      return SelectBinaryIntOp(I, ISD::ADD);
2850    case Instruction::Or:
2851      return SelectBinaryIntOp(I, ISD::OR);
2852    case Instruction::Sub:
2853      return SelectBinaryIntOp(I, ISD::SUB);
2854    case Instruction::FAdd:
2855      return SelectBinaryFPOp(I, ISD::FADD);
2856    case Instruction::FSub:
2857      return SelectBinaryFPOp(I, ISD::FSUB);
2858    case Instruction::FMul:
2859      return SelectBinaryFPOp(I, ISD::FMUL);
2860    case Instruction::SDiv:
2861      return SelectDiv(I, /*isSigned*/ true);
2862    case Instruction::UDiv:
2863      return SelectDiv(I, /*isSigned*/ false);
2864    case Instruction::SRem:
2865      return SelectRem(I, /*isSigned*/ true);
2866    case Instruction::URem:
2867      return SelectRem(I, /*isSigned*/ false);
2868    case Instruction::Call:
2869      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2870        return SelectIntrinsicCall(*II);
2871      return SelectCall(I);
2872    case Instruction::Select:
2873      return SelectSelect(I);
2874    case Instruction::Ret:
2875      return SelectRet(I);
2876    case Instruction::Trunc:
2877      return SelectTrunc(I);
2878    case Instruction::ZExt:
2879    case Instruction::SExt:
2880      return SelectIntExt(I);
2881    case Instruction::Shl:
2882      return SelectShift(I, ARM_AM::lsl);
2883    case Instruction::LShr:
2884      return SelectShift(I, ARM_AM::lsr);
2885    case Instruction::AShr:
2886      return SelectShift(I, ARM_AM::asr);
2887    default: break;
2888  }
2889  return false;
2890}
2891
2892// This table describes sign- and zero-extend instructions which can be
2893// folded into a preceding load. All of these extends have an immediate
2894// (sometimes a mask and sometimes a shift) that's applied after
2895// extension.
2896static const struct FoldableLoadExtendsStruct {
2897  uint16_t Opc[2];  // ARM, Thumb.
2898  uint8_t ExpectedImm;
2899  uint8_t isZExt     : 1;
2900  uint8_t ExpectedVT : 7;
2901} FoldableLoadExtends[] = {
2902  { { ARM::SXTH,  ARM::t2SXTH  },   0, 0, MVT::i16 },
2903  { { ARM::UXTH,  ARM::t2UXTH  },   0, 1, MVT::i16 },
2904  { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8  },
2905  { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
2906  { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
2907};
2908
2909/// The specified machine instr operand is a vreg, and that
2910/// vreg is being provided by the specified load instruction.  If possible,
2911/// try to fold the load as an operand to the instruction, returning true if
2912/// successful.
2913bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2914                                      const LoadInst *LI) {
2915  // Verify we have a legal type before going any further.
2916  MVT VT;
2917  if (!isLoadTypeLegal(LI->getType(), VT))
2918    return false;
2919
2920  // Combine load followed by zero- or sign-extend.
2921  // ldrb r1, [r0]       ldrb r1, [r0]
2922  // uxtb r2, r1     =>
2923  // mov  r3, r2         mov  r3, r1
2924  if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
2925    return false;
2926  const uint64_t Imm = MI->getOperand(2).getImm();
2927
2928  bool Found = false;
2929  bool isZExt;
2930  for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
2931    if (FLE.Opc[isThumb2] == MI->getOpcode() &&
2932        (uint64_t)FLE.ExpectedImm == Imm &&
2933        MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
2934      Found = true;
2935      isZExt = FLE.isZExt;
2936    }
2937  }
2938  if (!Found) return false;
2939
2940  // See if we can handle this address.
2941  Address Addr;
2942  if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
2943
2944  Register ResultReg = MI->getOperand(0).getReg();
2945  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
2946    return false;
2947  MachineBasicBlock::iterator I(MI);
2948  removeDeadCode(I, std::next(I));
2949  return true;
2950}
2951
2952unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
2953                                     unsigned Align, MVT VT) {
2954  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
2955
2956  LLVMContext *Context = &MF->getFunction().getContext();
2957  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2958  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2959  ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
2960      GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
2961      UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
2962      /*AddCurrentAddress=*/UseGOT_PREL);
2963
2964  unsigned ConstAlign =
2965      MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
2966  unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
2967  MachineMemOperand *CPMMO =
2968      MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
2969                               MachineMemOperand::MOLoad, 4, 4);
2970
2971  Register TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
2972  unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
2973  MachineInstrBuilder MIB =
2974      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
2975          .addConstantPoolIndex(Idx)
2976          .addMemOperand(CPMMO);
2977  if (Opc == ARM::LDRcp)
2978    MIB.addImm(0);
2979  MIB.add(predOps(ARMCC::AL));
2980
2981  // Fix the address by adding pc.
2982  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
2983  Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
2984                                                          : ARM::PICADD;
2985  DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
2986  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
2987            .addReg(TempReg)
2988            .addImm(ARMPCLabelIndex);
2989
2990  if (!Subtarget->isThumb())
2991    MIB.add(predOps(ARMCC::AL));
2992
2993  if (UseGOT_PREL && Subtarget->isThumb()) {
2994    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
2995    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2996                  TII.get(ARM::t2LDRi12), NewDestReg)
2997              .addReg(DestReg)
2998              .addImm(0);
2999    DestReg = NewDestReg;
3000    AddOptionalDefs(MIB);
3001  }
3002  return DestReg;
3003}
3004
3005bool ARMFastISel::fastLowerArguments() {
3006  if (!FuncInfo.CanLowerReturn)
3007    return false;
3008
3009  const Function *F = FuncInfo.Fn;
3010  if (F->isVarArg())
3011    return false;
3012
3013  CallingConv::ID CC = F->getCallingConv();
3014  switch (CC) {
3015  default:
3016    return false;
3017  case CallingConv::Fast:
3018  case CallingConv::C:
3019  case CallingConv::ARM_AAPCS_VFP:
3020  case CallingConv::ARM_AAPCS:
3021  case CallingConv::ARM_APCS:
3022  case CallingConv::Swift:
3023    break;
3024  }
3025
3026  // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3027  // which are passed in r0 - r3.
3028  for (const Argument &Arg : F->args()) {
3029    if (Arg.getArgNo() >= 4)
3030      return false;
3031
3032    if (Arg.hasAttribute(Attribute::InReg) ||
3033        Arg.hasAttribute(Attribute::StructRet) ||
3034        Arg.hasAttribute(Attribute::SwiftSelf) ||
3035        Arg.hasAttribute(Attribute::SwiftError) ||
3036        Arg.hasAttribute(Attribute::ByVal))
3037      return false;
3038
3039    Type *ArgTy = Arg.getType();
3040    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3041      return false;
3042
3043    EVT ArgVT = TLI.getValueType(DL, ArgTy);
3044    if (!ArgVT.isSimple()) return false;
3045    switch (ArgVT.getSimpleVT().SimpleTy) {
3046    case MVT::i8:
3047    case MVT::i16:
3048    case MVT::i32:
3049      break;
3050    default:
3051      return false;
3052    }
3053  }
3054
3055  static const MCPhysReg GPRArgRegs[] = {
3056    ARM::R0, ARM::R1, ARM::R2, ARM::R3
3057  };
3058
3059  const TargetRegisterClass *RC = &ARM::rGPRRegClass;
3060  for (const Argument &Arg : F->args()) {
3061    unsigned ArgNo = Arg.getArgNo();
3062    unsigned SrcReg = GPRArgRegs[ArgNo];
3063    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3064    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3065    // Without this, EmitLiveInCopies may eliminate the livein if its only
3066    // use is a bitcast (which isn't turned into an instruction).
3067    unsigned ResultReg = createResultReg(RC);
3068    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3069            TII.get(TargetOpcode::COPY),
3070            ResultReg).addReg(DstReg, getKillRegState(true));
3071    updateValueMap(&Arg, ResultReg);
3072  }
3073
3074  return true;
3075}
3076
3077namespace llvm {
3078
3079  FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
3080                                const TargetLibraryInfo *libInfo) {
3081    if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
3082      return new ARMFastISel(funcInfo, libInfo);
3083
3084    return nullptr;
3085  }
3086
3087} // end namespace llvm
3088