ARMISelDAGToDAG.cpp revision 288943
192654Sjeff//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
292654Sjeff//
392654Sjeff//                     The LLVM Compiler Infrastructure
492654Sjeff//
592654Sjeff// This file is distributed under the University of Illinois Open Source
692654Sjeff// License. See LICENSE.TXT for details.
792654Sjeff//
892654Sjeff//===----------------------------------------------------------------------===//
992654Sjeff//
1092654Sjeff// This file defines an instruction selector for the ARM target.
1192654Sjeff//
1292654Sjeff//===----------------------------------------------------------------------===//
1392654Sjeff
1492654Sjeff#include "ARM.h"
1592654Sjeff#include "ARMBaseInstrInfo.h"
1692654Sjeff#include "ARMTargetMachine.h"
1792654Sjeff#include "MCTargetDesc/ARMAddressingModes.h"
1892654Sjeff#include "llvm/ADT/StringSwitch.h"
1992654Sjeff#include "llvm/CodeGen/MachineFrameInfo.h"
2092654Sjeff#include "llvm/CodeGen/MachineFunction.h"
2192654Sjeff#include "llvm/CodeGen/MachineInstrBuilder.h"
2292654Sjeff#include "llvm/CodeGen/MachineRegisterInfo.h"
2392654Sjeff#include "llvm/CodeGen/SelectionDAG.h"
2492654Sjeff#include "llvm/CodeGen/SelectionDAGISel.h"
2592654Sjeff#include "llvm/IR/CallingConv.h"
2692654Sjeff#include "llvm/IR/Constants.h"
2792654Sjeff#include "llvm/IR/DerivedTypes.h"
2892654Sjeff#include "llvm/IR/Function.h"
2992654Sjeff#include "llvm/IR/Intrinsics.h"
3092654Sjeff#include "llvm/IR/LLVMContext.h"
3192654Sjeff#include "llvm/Support/CommandLine.h"
3292654Sjeff#include "llvm/Support/Compiler.h"
3392654Sjeff#include "llvm/Support/Debug.h"
3492654Sjeff#include "llvm/Support/ErrorHandling.h"
3592654Sjeff#include "llvm/Target/TargetLowering.h"
3692654Sjeff#include "llvm/Target/TargetOptions.h"
3792654Sjeff
3892654Sjeffusing namespace llvm;
3992654Sjeff
4092654Sjeff#define DEBUG_TYPE "arm-isel"
4192654Sjeff
4292654Sjeffstatic cl::opt<bool>
4392654SjeffDisableShifterOp("disable-shifter-op", cl::Hidden,
4492654Sjeff  cl::desc("Disable isel of shifter-op"),
4592654Sjeff  cl::init(false));
4692654Sjeff
4792654Sjeffstatic cl::opt<bool>
4892654SjeffCheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
4992654Sjeff  cl::desc("Check fp vmla / vmls hazard at isel time"),
5092654Sjeff  cl::init(true));
5192654Sjeff
5292654Sjeff//===--------------------------------------------------------------------===//
5392654Sjeff/// ARMDAGToDAGISel - ARM specific code to select ARM machine
5492654Sjeff/// instructions for SelectionDAG operations.
5592654Sjeff///
5692654Sjeffnamespace {
5792654Sjeff
5892654Sjeffenum AddrMode2Type {
5992654Sjeff  AM2_BASE, // Simple AM2 (+-imm12)
6092654Sjeff  AM2_SHOP  // Shifter-op AM2
6192654Sjeff};
6292654Sjeff
6392654Sjeffclass ARMDAGToDAGISel : public SelectionDAGISel {
6492654Sjeff  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
6592654Sjeff  /// make the right decision when generating code for different targets.
6692654Sjeff  const ARMSubtarget *Subtarget;
6792654Sjeff
6892654Sjeffpublic:
6992654Sjeff  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
7092654Sjeff      : SelectionDAGISel(tm, OptLevel) {}
7192654Sjeff
7292654Sjeff  bool runOnMachineFunction(MachineFunction &MF) override {
7392654Sjeff    // Reset the subtarget each time through.
7492654Sjeff    Subtarget = &MF.getSubtarget<ARMSubtarget>();
7592654Sjeff    SelectionDAGISel::runOnMachineFunction(MF);
7692654Sjeff    return true;
7792654Sjeff  }
7892654Sjeff
7992654Sjeff  const char *getPassName() const override {
8092654Sjeff    return "ARM Instruction Selection";
8192654Sjeff  }
8292654Sjeff
8392654Sjeff  void PreprocessISelDAG() override;
8492654Sjeff
8592654Sjeff  /// getI32Imm - Return a target constant of type i32 with the specified
8692654Sjeff  /// value.
8792654Sjeff  inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
8892654Sjeff    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
8992654Sjeff  }
9092654Sjeff
9192654Sjeff  SDNode *Select(SDNode *N) override;
9292654Sjeff
9392654Sjeff
9492654Sjeff  bool hasNoVMLxHazardUse(SDNode *N) const;
9592654Sjeff  bool isShifterOpProfitable(const SDValue &Shift,
9692654Sjeff                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
9792654Sjeff  bool SelectRegShifterOperand(SDValue N, SDValue &A,
9892654Sjeff                               SDValue &B, SDValue &C,
9992654Sjeff                               bool CheckProfitability = true);
10092654Sjeff  bool SelectImmShifterOperand(SDValue N, SDValue &A,
10192654Sjeff                               SDValue &B, bool CheckProfitability = true);
10292654Sjeff  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
10392654Sjeff                                    SDValue &B, SDValue &C) {
10492654Sjeff    // Don't apply the profitability check
10592654Sjeff    return SelectRegShifterOperand(N, A, B, C, false);
10692654Sjeff  }
10792654Sjeff  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
10892654Sjeff                                    SDValue &B) {
10992654Sjeff    // Don't apply the profitability check
11092654Sjeff    return SelectImmShifterOperand(N, A, B, false);
11192654Sjeff  }
11292654Sjeff
11392654Sjeff  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
11492654Sjeff  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
11592654Sjeff
11692654Sjeff  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
11792654Sjeff                                      SDValue &Offset, SDValue &Opc);
11892654Sjeff  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
11992654Sjeff                           SDValue &Opc) {
12092654Sjeff    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
12192654Sjeff  }
12292654Sjeff
12392654Sjeff  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
12492654Sjeff                           SDValue &Opc) {
12592654Sjeff    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
12692654Sjeff  }
12792654Sjeff
12892654Sjeff  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
12992654Sjeff                       SDValue &Opc) {
13092654Sjeff    SelectAddrMode2Worker(N, Base, Offset, Opc);
13192654Sjeff//    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
13292654Sjeff    // This always matches one way or another.
13392654Sjeff    return true;
13492654Sjeff  }
13592654Sjeff
13692654Sjeff  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
13792654Sjeff    const ConstantSDNode *CN = cast<ConstantSDNode>(N);
13892654Sjeff    Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
13992654Sjeff    Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
14092654Sjeff    return true;
14192654Sjeff  }
14292654Sjeff
14392654Sjeff  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
14492654Sjeff                             SDValue &Offset, SDValue &Opc);
14592654Sjeff  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
14692654Sjeff                             SDValue &Offset, SDValue &Opc);
14792654Sjeff  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
14892654Sjeff                             SDValue &Offset, SDValue &Opc);
14992654Sjeff  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
15092654Sjeff  bool SelectAddrMode3(SDValue N, SDValue &Base,
15192654Sjeff                       SDValue &Offset, SDValue &Opc);
15292654Sjeff  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
15392654Sjeff                             SDValue &Offset, SDValue &Opc);
15492654Sjeff  bool SelectAddrMode5(SDValue N, SDValue &Base,
15592654Sjeff                       SDValue &Offset);
15692654Sjeff  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
15792654Sjeff  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
15892654Sjeff
15992654Sjeff  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
16092654Sjeff
16192654Sjeff  // Thumb Addressing Modes:
16292654Sjeff  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
16392654Sjeff  bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
16492654Sjeff                             unsigned Scale);
16592654Sjeff  bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
16692654Sjeff  bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
16792654Sjeff  bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
16892654Sjeff  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
16992654Sjeff                                SDValue &OffImm);
17092654Sjeff  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
17192654Sjeff                                 SDValue &OffImm);
17292654Sjeff  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
17392654Sjeff                                 SDValue &OffImm);
17492654Sjeff  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
17595758Sjeff                                 SDValue &OffImm);
17692654Sjeff  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
17792654Sjeff
17892654Sjeff  // Thumb 2 Addressing Modes:
17992654Sjeff  bool SelectT2ShifterOperandReg(SDValue N,
18092654Sjeff                                 SDValue &BaseReg, SDValue &Opc);
18192654Sjeff  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
18292654Sjeff  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
18392654Sjeff                            SDValue &OffImm);
18492654Sjeff  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
18592654Sjeff                                 SDValue &OffImm);
18694161Sjeff  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
18792654Sjeff                             SDValue &OffReg, SDValue &ShImm);
18892654Sjeff  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
18992654Sjeff
19092654Sjeff  inline bool is_so_imm(unsigned Imm) const {
19192654Sjeff    return ARM_AM::getSOImmVal(Imm) != -1;
19292654Sjeff  }
19394161Sjeff
19492654Sjeff  inline bool is_so_imm_not(unsigned Imm) const {
19592654Sjeff    return ARM_AM::getSOImmVal(~Imm) != -1;
19692654Sjeff  }
19792654Sjeff
19892654Sjeff  inline bool is_t2_so_imm(unsigned Imm) const {
19992654Sjeff    return ARM_AM::getT2SOImmVal(Imm) != -1;
20092654Sjeff  }
20195766Sjeff
20292654Sjeff  inline bool is_t2_so_imm_not(unsigned Imm) const {
20392654Sjeff    return ARM_AM::getT2SOImmVal(~Imm) != -1;
20492654Sjeff  }
20592654Sjeff
20692654Sjeff  // Include the pieces autogenerated from the target description.
20792654Sjeff#include "ARMGenDAGISel.inc"
20892654Sjeff
20995766Sjeffprivate:
21092654Sjeff  /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
21192654Sjeff  /// ARM.
21292654Sjeff  SDNode *SelectARMIndexedLoad(SDNode *N);
21392654Sjeff  SDNode *SelectT2IndexedLoad(SDNode *N);
21492654Sjeff
21592654Sjeff  /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
21692654Sjeff  /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
21795766Sjeff  /// loads of D registers and even subregs and odd subregs of Q registers.
21892654Sjeff  /// For NumVecs <= 2, QOpcodes1 is not used.
21992654Sjeff  SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
22095766Sjeff                    const uint16_t *DOpcodes,
22192654Sjeff                    const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
22295766Sjeff
22392654Sjeff  /// SelectVST - Select NEON store intrinsics.  NumVecs should
22492654Sjeff  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
22592654Sjeff  /// stores of D registers and even subregs and odd subregs of Q registers.
22692654Sjeff  /// For NumVecs <= 2, QOpcodes1 is not used.
22792654Sjeff  SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
22892654Sjeff                    const uint16_t *DOpcodes,
22992654Sjeff                    const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
23092654Sjeff
23192654Sjeff  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
23292654Sjeff  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
23392654Sjeff  /// load/store of D registers and Q registers.
23492654Sjeff  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
23592654Sjeff                          bool isUpdating, unsigned NumVecs,
23692654Sjeff                          const uint16_t *DOpcodes, const uint16_t *QOpcodes);
23792654Sjeff
23892654Sjeff  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
23992654Sjeff  /// should be 2, 3 or 4.  The opcode array specifies the instructions used
24092654Sjeff  /// for loading D registers.  (Q registers are not supported.)
24192654Sjeff  SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
24292654Sjeff                       const uint16_t *Opcodes);
24392654Sjeff
24492654Sjeff  /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
24592654Sjeff  /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
24692654Sjeff  /// generated to force the table registers to be consecutive.
24792654Sjeff  SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
24892654Sjeff
24992654Sjeff  /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
25092654Sjeff  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
25192654Sjeff
25292654Sjeff  // Select special operations if node forms integer ABS pattern
25392654Sjeff  SDNode *SelectABSOp(SDNode *N);
25492654Sjeff
25592654Sjeff  SDNode *SelectReadRegister(SDNode *N);
25692654Sjeff  SDNode *SelectWriteRegister(SDNode *N);
25792654Sjeff
25892654Sjeff  SDNode *SelectInlineAsm(SDNode *N);
25992654Sjeff
26092654Sjeff  SDNode *SelectConcatVector(SDNode *N);
26192654Sjeff
26292654Sjeff  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
26392654Sjeff  /// inline asm expressions.
26492654Sjeff  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
26592654Sjeff                                    std::vector<SDValue> &OutOps) override;
26692654Sjeff
26792654Sjeff  // Form pairs of consecutive R, S, D, or Q registers.
26892654Sjeff  SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
26992654Sjeff  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
27092654Sjeff  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
27192654Sjeff  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
27292654Sjeff
27392654Sjeff  // Form sequences of 4 consecutive S, D, or Q registers.
27492654Sjeff  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
27592654Sjeff  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
27692654Sjeff  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
27792654Sjeff
27892654Sjeff  // Get the alignment operand for a NEON VLD or VST instruction.
27992654Sjeff  SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
28092654Sjeff                        bool is64BitVector);
28192654Sjeff};
28292654Sjeff}
28392654Sjeff
28492654Sjeff/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
28592654Sjeff/// operand. If so Imm will receive the 32-bit value.
28692654Sjeffstatic bool isInt32Immediate(SDNode *N, unsigned &Imm) {
28792654Sjeff  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
28892654Sjeff    Imm = cast<ConstantSDNode>(N)->getZExtValue();
28992654Sjeff    return true;
29092654Sjeff  }
29192654Sjeff  return false;
29292654Sjeff}
29392654Sjeff
29492654Sjeff// isInt32Immediate - This method tests to see if a constant operand.
29592654Sjeff// If so Imm will receive the 32 bit value.
29692654Sjeffstatic bool isInt32Immediate(SDValue N, unsigned &Imm) {
29792654Sjeff  return isInt32Immediate(N.getNode(), Imm);
29892654Sjeff}
29992654Sjeff
30092654Sjeff// isOpcWithIntImmediate - This method tests to see if the node is a specific
30192654Sjeff// opcode and that it has a immediate integer right operand.
30292654Sjeff// If so Imm will receive the 32 bit value.
30392654Sjeffstatic bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
30492654Sjeff  return N->getOpcode() == Opc &&
30592654Sjeff         isInt32Immediate(N->getOperand(1).getNode(), Imm);
30692654Sjeff}
30792654Sjeff
30892654Sjeff/// \brief Check whether a particular node is a constant value representable as
30992654Sjeff/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
31092654Sjeff///
31192654Sjeff/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
31292654Sjeffstatic bool isScaledConstantInRange(SDValue Node, int Scale,
31392654Sjeff                                    int RangeMin, int RangeMax,
31492654Sjeff                                    int &ScaledConstant) {
31592654Sjeff  assert(Scale > 0 && "Invalid scale!");
31692654Sjeff
31792654Sjeff  // Check that this is a constant.
31892654Sjeff  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
31992654Sjeff  if (!C)
32092654Sjeff    return false;
32192654Sjeff
32292654Sjeff  ScaledConstant = (int) C->getZExtValue();
32392654Sjeff  if ((ScaledConstant % Scale) != 0)
32492654Sjeff    return false;
32592654Sjeff
32692654Sjeff  ScaledConstant /= Scale;
32792654Sjeff  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
32892654Sjeff}
32992654Sjeff
33092654Sjeffvoid ARMDAGToDAGISel::PreprocessISelDAG() {
33192654Sjeff  if (!Subtarget->hasV6T2Ops())
33292654Sjeff    return;
33392654Sjeff
33492654Sjeff  bool isThumb2 = Subtarget->isThumb();
33592654Sjeff  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
33692654Sjeff       E = CurDAG->allnodes_end(); I != E; ) {
33792654Sjeff    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
33892654Sjeff
33992654Sjeff    if (N->getOpcode() != ISD::ADD)
34092654Sjeff      continue;
34192654Sjeff
34292654Sjeff    // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
34392654Sjeff    // leading zeros, followed by consecutive set bits, followed by 1 or 2
34492654Sjeff    // trailing zeros, e.g. 1020.
34592654Sjeff    // Transform the expression to
34692654Sjeff    // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
34792654Sjeff    // of trailing zeros of c2. The left shift would be folded as an shifter
34892654Sjeff    // operand of 'add' and the 'and' and 'srl' would become a bits extraction
34992654Sjeff    // node (UBFX).
35092654Sjeff
35192654Sjeff    SDValue N0 = N->getOperand(0);
35292654Sjeff    SDValue N1 = N->getOperand(1);
35392654Sjeff    unsigned And_imm = 0;
35492654Sjeff    if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
35592654Sjeff      if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
35692654Sjeff        std::swap(N0, N1);
35792758Sjeff    }
35892758Sjeff    if (!And_imm)
35992758Sjeff      continue;
36092758Sjeff
36192758Sjeff    // Check if the AND mask is an immediate of the form: 000.....1111111100
36292758Sjeff    unsigned TZ = countTrailingZeros(And_imm);
36392758Sjeff    if (TZ != 1 && TZ != 2)
36492758Sjeff      // Be conservative here. Shifter operands aren't always free. e.g. On
36592758Sjeff      // Swift, left shifter operand of 1 / 2 for free but others are not.
36692758Sjeff      // e.g.
36792654Sjeff      //  ubfx   r3, r1, #16, #8
36892654Sjeff      //  ldr.w  r3, [r0, r3, lsl #2]
36992654Sjeff      // vs.
37092654Sjeff      //  mov.w  r9, #1020
37192654Sjeff      //  and.w  r2, r9, r1, lsr #14
37292654Sjeff      //  ldr    r2, [r0, r2]
37392654Sjeff      continue;
37492654Sjeff    And_imm >>= TZ;
37592654Sjeff    if (And_imm & (And_imm + 1))
37692654Sjeff      continue;
37792654Sjeff
37892654Sjeff    // Look for (and (srl X, c1), c2).
37992654Sjeff    SDValue Srl = N1.getOperand(0);
38092654Sjeff    unsigned Srl_imm = 0;
38192654Sjeff    if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
38292654Sjeff        (Srl_imm <= 2))
38392654Sjeff      continue;
38492654Sjeff
38592654Sjeff    // Make sure first operand is not a shifter operand which would prevent
38692654Sjeff    // folding of the left shift.
38792654Sjeff    SDValue CPTmp0;
38892654Sjeff    SDValue CPTmp1;
38992654Sjeff    SDValue CPTmp2;
39092654Sjeff    if (isThumb2) {
39192654Sjeff      if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
39292654Sjeff        continue;
39392654Sjeff    } else {
39492654Sjeff      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
39592654Sjeff          SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
39692654Sjeff        continue;
39792654Sjeff    }
39892654Sjeff
39992654Sjeff    // Now make the transformation.
40092654Sjeff    Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
40192654Sjeff                          Srl.getOperand(0),
40292654Sjeff                          CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
40392654Sjeff                                              MVT::i32));
40492654Sjeff    N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
40592654Sjeff                         Srl,
40694157Sjeff                         CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
40792654Sjeff    N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
40892654Sjeff                         N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
40992654Sjeff    CurDAG->UpdateNodeOperands(N, N0, N1);
41092654Sjeff  }
41192654Sjeff}
41292654Sjeff
41392654Sjeff/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
41492654Sjeff/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
41592654Sjeff/// least on current ARM implementations) which should be avoidded.
41692654Sjeffbool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
41792654Sjeff  if (OptLevel == CodeGenOpt::None)
41892654Sjeff    return true;
41992654Sjeff
42092654Sjeff  if (!CheckVMLxHazard)
42192654Sjeff    return true;
42292654Sjeff
42392654Sjeff  if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
42492654Sjeff      !Subtarget->isCortexA9() && !Subtarget->isSwift())
42592654Sjeff    return true;
426
427  if (!N->hasOneUse())
428    return false;
429
430  SDNode *Use = *N->use_begin();
431  if (Use->getOpcode() == ISD::CopyToReg)
432    return true;
433  if (Use->isMachineOpcode()) {
434    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
435        CurDAG->getSubtarget().getInstrInfo());
436
437    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
438    if (MCID.mayStore())
439      return true;
440    unsigned Opcode = MCID.getOpcode();
441    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
442      return true;
443    // vmlx feeding into another vmlx. We actually want to unfold
444    // the use later in the MLxExpansion pass. e.g.
445    // vmla
446    // vmla (stall 8 cycles)
447    //
448    // vmul (5 cycles)
449    // vadd (5 cycles)
450    // vmla
451    // This adds up to about 18 - 19 cycles.
452    //
453    // vmla
454    // vmul (stall 4 cycles)
455    // vadd adds up to about 14 cycles.
456    return TII->isFpMLxInstruction(Opcode);
457  }
458
459  return false;
460}
461
462bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
463                                            ARM_AM::ShiftOpc ShOpcVal,
464                                            unsigned ShAmt) {
465  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
466    return true;
467  if (Shift.hasOneUse())
468    return true;
469  // R << 2 is free.
470  return ShOpcVal == ARM_AM::lsl &&
471         (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
472}
473
474bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
475                                              SDValue &BaseReg,
476                                              SDValue &Opc,
477                                              bool CheckProfitability) {
478  if (DisableShifterOp)
479    return false;
480
481  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
482
483  // Don't match base register only case. That is matched to a separate
484  // lower complexity pattern with explicit register operand.
485  if (ShOpcVal == ARM_AM::no_shift) return false;
486
487  BaseReg = N.getOperand(0);
488  unsigned ShImmVal = 0;
489  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
490  if (!RHS) return false;
491  ShImmVal = RHS->getZExtValue() & 31;
492  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
493                                  SDLoc(N), MVT::i32);
494  return true;
495}
496
497bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
498                                              SDValue &BaseReg,
499                                              SDValue &ShReg,
500                                              SDValue &Opc,
501                                              bool CheckProfitability) {
502  if (DisableShifterOp)
503    return false;
504
505  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
506
507  // Don't match base register only case. That is matched to a separate
508  // lower complexity pattern with explicit register operand.
509  if (ShOpcVal == ARM_AM::no_shift) return false;
510
511  BaseReg = N.getOperand(0);
512  unsigned ShImmVal = 0;
513  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
514  if (RHS) return false;
515
516  ShReg = N.getOperand(1);
517  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
518    return false;
519  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
520                                  SDLoc(N), MVT::i32);
521  return true;
522}
523
524
525bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
526                                          SDValue &Base,
527                                          SDValue &OffImm) {
528  // Match simple R + imm12 operands.
529
530  // Base only.
531  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
532      !CurDAG->isBaseWithConstantOffset(N)) {
533    if (N.getOpcode() == ISD::FrameIndex) {
534      // Match frame index.
535      int FI = cast<FrameIndexSDNode>(N)->getIndex();
536      Base = CurDAG->getTargetFrameIndex(
537          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
538      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
539      return true;
540    }
541
542    if (N.getOpcode() == ARMISD::Wrapper &&
543        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
544      Base = N.getOperand(0);
545    } else
546      Base = N;
547    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
548    return true;
549  }
550
551  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
552    int RHSC = (int)RHS->getSExtValue();
553    if (N.getOpcode() == ISD::SUB)
554      RHSC = -RHSC;
555
556    if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
557      Base   = N.getOperand(0);
558      if (Base.getOpcode() == ISD::FrameIndex) {
559        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
560        Base = CurDAG->getTargetFrameIndex(
561            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
562      }
563      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
564      return true;
565    }
566  }
567
568  // Base only.
569  Base = N;
570  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
571  return true;
572}
573
574
575
576bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
577                                      SDValue &Opc) {
578  if (N.getOpcode() == ISD::MUL &&
579      ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
580    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
581      // X * [3,5,9] -> X + X * [2,4,8] etc.
582      int RHSC = (int)RHS->getZExtValue();
583      if (RHSC & 1) {
584        RHSC = RHSC & ~1;
585        ARM_AM::AddrOpc AddSub = ARM_AM::add;
586        if (RHSC < 0) {
587          AddSub = ARM_AM::sub;
588          RHSC = - RHSC;
589        }
590        if (isPowerOf2_32(RHSC)) {
591          unsigned ShAmt = Log2_32(RHSC);
592          Base = Offset = N.getOperand(0);
593          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
594                                                            ARM_AM::lsl),
595                                          SDLoc(N), MVT::i32);
596          return true;
597        }
598      }
599    }
600  }
601
602  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
603      // ISD::OR that is equivalent to an ISD::ADD.
604      !CurDAG->isBaseWithConstantOffset(N))
605    return false;
606
607  // Leave simple R +/- imm12 operands for LDRi12
608  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
609    int RHSC;
610    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
611                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
612      return false;
613  }
614
615  // Otherwise this is R +/- [possibly shifted] R.
616  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
617  ARM_AM::ShiftOpc ShOpcVal =
618    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
619  unsigned ShAmt = 0;
620
621  Base   = N.getOperand(0);
622  Offset = N.getOperand(1);
623
624  if (ShOpcVal != ARM_AM::no_shift) {
625    // Check to see if the RHS of the shift is a constant, if not, we can't fold
626    // it.
627    if (ConstantSDNode *Sh =
628           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
629      ShAmt = Sh->getZExtValue();
630      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
631        Offset = N.getOperand(1).getOperand(0);
632      else {
633        ShAmt = 0;
634        ShOpcVal = ARM_AM::no_shift;
635      }
636    } else {
637      ShOpcVal = ARM_AM::no_shift;
638    }
639  }
640
641  // Try matching (R shl C) + (R).
642  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
643      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
644        N.getOperand(0).hasOneUse())) {
645    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
646    if (ShOpcVal != ARM_AM::no_shift) {
647      // Check to see if the RHS of the shift is a constant, if not, we can't
648      // fold it.
649      if (ConstantSDNode *Sh =
650          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
651        ShAmt = Sh->getZExtValue();
652        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
653          Offset = N.getOperand(0).getOperand(0);
654          Base = N.getOperand(1);
655        } else {
656          ShAmt = 0;
657          ShOpcVal = ARM_AM::no_shift;
658        }
659      } else {
660        ShOpcVal = ARM_AM::no_shift;
661      }
662    }
663  }
664
665  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
666                                  SDLoc(N), MVT::i32);
667  return true;
668}
669
670
671//-----
672
673AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
674                                                     SDValue &Base,
675                                                     SDValue &Offset,
676                                                     SDValue &Opc) {
677  if (N.getOpcode() == ISD::MUL &&
678      (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
679    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
680      // X * [3,5,9] -> X + X * [2,4,8] etc.
681      int RHSC = (int)RHS->getZExtValue();
682      if (RHSC & 1) {
683        RHSC = RHSC & ~1;
684        ARM_AM::AddrOpc AddSub = ARM_AM::add;
685        if (RHSC < 0) {
686          AddSub = ARM_AM::sub;
687          RHSC = - RHSC;
688        }
689        if (isPowerOf2_32(RHSC)) {
690          unsigned ShAmt = Log2_32(RHSC);
691          Base = Offset = N.getOperand(0);
692          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
693                                                            ARM_AM::lsl),
694                                          SDLoc(N), MVT::i32);
695          return AM2_SHOP;
696        }
697      }
698    }
699  }
700
701  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
702      // ISD::OR that is equivalent to an ADD.
703      !CurDAG->isBaseWithConstantOffset(N)) {
704    Base = N;
705    if (N.getOpcode() == ISD::FrameIndex) {
706      int FI = cast<FrameIndexSDNode>(N)->getIndex();
707      Base = CurDAG->getTargetFrameIndex(
708          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
709    } else if (N.getOpcode() == ARMISD::Wrapper &&
710               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
711      Base = N.getOperand(0);
712    }
713    Offset = CurDAG->getRegister(0, MVT::i32);
714    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
715                                                      ARM_AM::no_shift),
716                                    SDLoc(N), MVT::i32);
717    return AM2_BASE;
718  }
719
720  // Match simple R +/- imm12 operands.
721  if (N.getOpcode() != ISD::SUB) {
722    int RHSC;
723    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
724                                -0x1000+1, 0x1000, RHSC)) { // 12 bits.
725      Base = N.getOperand(0);
726      if (Base.getOpcode() == ISD::FrameIndex) {
727        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
728        Base = CurDAG->getTargetFrameIndex(
729            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
730      }
731      Offset = CurDAG->getRegister(0, MVT::i32);
732
733      ARM_AM::AddrOpc AddSub = ARM_AM::add;
734      if (RHSC < 0) {
735        AddSub = ARM_AM::sub;
736        RHSC = - RHSC;
737      }
738      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
739                                                        ARM_AM::no_shift),
740                                      SDLoc(N), MVT::i32);
741      return AM2_BASE;
742    }
743  }
744
745  if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
746    // Compute R +/- (R << N) and reuse it.
747    Base = N;
748    Offset = CurDAG->getRegister(0, MVT::i32);
749    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
750                                                      ARM_AM::no_shift),
751                                    SDLoc(N), MVT::i32);
752    return AM2_BASE;
753  }
754
755  // Otherwise this is R +/- [possibly shifted] R.
756  ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
757  ARM_AM::ShiftOpc ShOpcVal =
758    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
759  unsigned ShAmt = 0;
760
761  Base   = N.getOperand(0);
762  Offset = N.getOperand(1);
763
764  if (ShOpcVal != ARM_AM::no_shift) {
765    // Check to see if the RHS of the shift is a constant, if not, we can't fold
766    // it.
767    if (ConstantSDNode *Sh =
768           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
769      ShAmt = Sh->getZExtValue();
770      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
771        Offset = N.getOperand(1).getOperand(0);
772      else {
773        ShAmt = 0;
774        ShOpcVal = ARM_AM::no_shift;
775      }
776    } else {
777      ShOpcVal = ARM_AM::no_shift;
778    }
779  }
780
781  // Try matching (R shl C) + (R).
782  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
783      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
784        N.getOperand(0).hasOneUse())) {
785    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
786    if (ShOpcVal != ARM_AM::no_shift) {
787      // Check to see if the RHS of the shift is a constant, if not, we can't
788      // fold it.
789      if (ConstantSDNode *Sh =
790          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
791        ShAmt = Sh->getZExtValue();
792        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
793          Offset = N.getOperand(0).getOperand(0);
794          Base = N.getOperand(1);
795        } else {
796          ShAmt = 0;
797          ShOpcVal = ARM_AM::no_shift;
798        }
799      } else {
800        ShOpcVal = ARM_AM::no_shift;
801      }
802    }
803  }
804
805  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
806                                  SDLoc(N), MVT::i32);
807  return AM2_SHOP;
808}
809
810bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
811                                            SDValue &Offset, SDValue &Opc) {
812  unsigned Opcode = Op->getOpcode();
813  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
814    ? cast<LoadSDNode>(Op)->getAddressingMode()
815    : cast<StoreSDNode>(Op)->getAddressingMode();
816  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
817    ? ARM_AM::add : ARM_AM::sub;
818  int Val;
819  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
820    return false;
821
822  Offset = N;
823  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
824  unsigned ShAmt = 0;
825  if (ShOpcVal != ARM_AM::no_shift) {
826    // Check to see if the RHS of the shift is a constant, if not, we can't fold
827    // it.
828    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
829      ShAmt = Sh->getZExtValue();
830      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
831        Offset = N.getOperand(0);
832      else {
833        ShAmt = 0;
834        ShOpcVal = ARM_AM::no_shift;
835      }
836    } else {
837      ShOpcVal = ARM_AM::no_shift;
838    }
839  }
840
841  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
842                                  SDLoc(N), MVT::i32);
843  return true;
844}
845
846bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
847                                            SDValue &Offset, SDValue &Opc) {
848  unsigned Opcode = Op->getOpcode();
849  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
850    ? cast<LoadSDNode>(Op)->getAddressingMode()
851    : cast<StoreSDNode>(Op)->getAddressingMode();
852  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
853    ? ARM_AM::add : ARM_AM::sub;
854  int Val;
855  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
856    if (AddSub == ARM_AM::sub) Val *= -1;
857    Offset = CurDAG->getRegister(0, MVT::i32);
858    Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
859    return true;
860  }
861
862  return false;
863}
864
865
866bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
867                                            SDValue &Offset, SDValue &Opc) {
868  unsigned Opcode = Op->getOpcode();
869  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
870    ? cast<LoadSDNode>(Op)->getAddressingMode()
871    : cast<StoreSDNode>(Op)->getAddressingMode();
872  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
873    ? ARM_AM::add : ARM_AM::sub;
874  int Val;
875  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
876    Offset = CurDAG->getRegister(0, MVT::i32);
877    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
878                                                      ARM_AM::no_shift),
879                                    SDLoc(Op), MVT::i32);
880    return true;
881  }
882
883  return false;
884}
885
886bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
887  Base = N;
888  return true;
889}
890
891bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
892                                      SDValue &Base, SDValue &Offset,
893                                      SDValue &Opc) {
894  if (N.getOpcode() == ISD::SUB) {
895    // X - C  is canonicalize to X + -C, no need to handle it here.
896    Base = N.getOperand(0);
897    Offset = N.getOperand(1);
898    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
899                                    MVT::i32);
900    return true;
901  }
902
903  if (!CurDAG->isBaseWithConstantOffset(N)) {
904    Base = N;
905    if (N.getOpcode() == ISD::FrameIndex) {
906      int FI = cast<FrameIndexSDNode>(N)->getIndex();
907      Base = CurDAG->getTargetFrameIndex(
908          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
909    }
910    Offset = CurDAG->getRegister(0, MVT::i32);
911    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
912                                    MVT::i32);
913    return true;
914  }
915
916  // If the RHS is +/- imm8, fold into addr mode.
917  int RHSC;
918  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
919                              -256 + 1, 256, RHSC)) { // 8 bits.
920    Base = N.getOperand(0);
921    if (Base.getOpcode() == ISD::FrameIndex) {
922      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
923      Base = CurDAG->getTargetFrameIndex(
924          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
925    }
926    Offset = CurDAG->getRegister(0, MVT::i32);
927
928    ARM_AM::AddrOpc AddSub = ARM_AM::add;
929    if (RHSC < 0) {
930      AddSub = ARM_AM::sub;
931      RHSC = -RHSC;
932    }
933    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
934                                    MVT::i32);
935    return true;
936  }
937
938  Base = N.getOperand(0);
939  Offset = N.getOperand(1);
940  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
941                                  MVT::i32);
942  return true;
943}
944
945bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
946                                            SDValue &Offset, SDValue &Opc) {
947  unsigned Opcode = Op->getOpcode();
948  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
949    ? cast<LoadSDNode>(Op)->getAddressingMode()
950    : cast<StoreSDNode>(Op)->getAddressingMode();
951  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
952    ? ARM_AM::add : ARM_AM::sub;
953  int Val;
954  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
955    Offset = CurDAG->getRegister(0, MVT::i32);
956    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
957                                    MVT::i32);
958    return true;
959  }
960
961  Offset = N;
962  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
963                                  MVT::i32);
964  return true;
965}
966
967bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
968                                      SDValue &Base, SDValue &Offset) {
969  if (!CurDAG->isBaseWithConstantOffset(N)) {
970    Base = N;
971    if (N.getOpcode() == ISD::FrameIndex) {
972      int FI = cast<FrameIndexSDNode>(N)->getIndex();
973      Base = CurDAG->getTargetFrameIndex(
974          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
975    } else if (N.getOpcode() == ARMISD::Wrapper &&
976               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
977      Base = N.getOperand(0);
978    }
979    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
980                                       SDLoc(N), MVT::i32);
981    return true;
982  }
983
984  // If the RHS is +/- imm8, fold into addr mode.
985  int RHSC;
986  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
987                              -256 + 1, 256, RHSC)) {
988    Base = N.getOperand(0);
989    if (Base.getOpcode() == ISD::FrameIndex) {
990      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
991      Base = CurDAG->getTargetFrameIndex(
992          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
993    }
994
995    ARM_AM::AddrOpc AddSub = ARM_AM::add;
996    if (RHSC < 0) {
997      AddSub = ARM_AM::sub;
998      RHSC = -RHSC;
999    }
1000    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1001                                       SDLoc(N), MVT::i32);
1002    return true;
1003  }
1004
1005  Base = N;
1006  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1007                                     SDLoc(N), MVT::i32);
1008  return true;
1009}
1010
1011bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1012                                      SDValue &Align) {
1013  Addr = N;
1014
1015  unsigned Alignment = 0;
1016
1017  MemSDNode *MemN = cast<MemSDNode>(Parent);
1018
1019  if (isa<LSBaseSDNode>(MemN) ||
1020      ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1021        MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1022       MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1023    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1024    // The maximum alignment is equal to the memory size being referenced.
1025    unsigned MMOAlign = MemN->getAlignment();
1026    unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1027    if (MMOAlign >= MemSize && MemSize > 1)
1028      Alignment = MemSize;
1029  } else {
1030    // All other uses of addrmode6 are for intrinsics.  For now just record
1031    // the raw alignment value; it will be refined later based on the legal
1032    // alignment operands for the intrinsic.
1033    Alignment = MemN->getAlignment();
1034  }
1035
1036  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1037  return true;
1038}
1039
1040bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1041                                            SDValue &Offset) {
1042  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1043  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1044  if (AM != ISD::POST_INC)
1045    return false;
1046  Offset = N;
1047  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1048    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1049      Offset = CurDAG->getRegister(0, MVT::i32);
1050  }
1051  return true;
1052}
1053
1054bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1055                                       SDValue &Offset, SDValue &Label) {
1056  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1057    Offset = N.getOperand(0);
1058    SDValue N1 = N.getOperand(1);
1059    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1060                                      SDLoc(N), MVT::i32);
1061    return true;
1062  }
1063
1064  return false;
1065}
1066
1067
1068//===----------------------------------------------------------------------===//
1069//                         Thumb Addressing Modes
1070//===----------------------------------------------------------------------===//
1071
1072bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1073                                            SDValue &Base, SDValue &Offset){
1074  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1075    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1076    if (!NC || !NC->isNullValue())
1077      return false;
1078
1079    Base = Offset = N;
1080    return true;
1081  }
1082
1083  Base = N.getOperand(0);
1084  Offset = N.getOperand(1);
1085  return true;
1086}
1087
1088bool
1089ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
1090                                       SDValue &Offset, unsigned Scale) {
1091  if (Scale == 4) {
1092    SDValue TmpBase, TmpOffImm;
1093    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1094      return false;  // We want to select tLDRspi / tSTRspi instead.
1095
1096    if (N.getOpcode() == ARMISD::Wrapper &&
1097        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1098      return false;  // We want to select tLDRpci instead.
1099  }
1100
1101  if (!CurDAG->isBaseWithConstantOffset(N))
1102    return false;
1103
1104  // Thumb does not have [sp, r] address mode.
1105  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1106  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1107  if ((LHSR && LHSR->getReg() == ARM::SP) ||
1108      (RHSR && RHSR->getReg() == ARM::SP))
1109    return false;
1110
1111  // FIXME: Why do we explicitly check for a match here and then return false?
1112  // Presumably to allow something else to match, but shouldn't this be
1113  // documented?
1114  int RHSC;
1115  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
1116    return false;
1117
1118  Base = N.getOperand(0);
1119  Offset = N.getOperand(1);
1120  return true;
1121}
1122
1123bool
1124ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
1125                                          SDValue &Base,
1126                                          SDValue &Offset) {
1127  return SelectThumbAddrModeRI(N, Base, Offset, 1);
1128}
1129
1130bool
1131ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
1132                                          SDValue &Base,
1133                                          SDValue &Offset) {
1134  return SelectThumbAddrModeRI(N, Base, Offset, 2);
1135}
1136
1137bool
1138ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
1139                                          SDValue &Base,
1140                                          SDValue &Offset) {
1141  return SelectThumbAddrModeRI(N, Base, Offset, 4);
1142}
1143
1144bool
1145ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1146                                          SDValue &Base, SDValue &OffImm) {
1147  if (Scale == 4) {
1148    SDValue TmpBase, TmpOffImm;
1149    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1150      return false;  // We want to select tLDRspi / tSTRspi instead.
1151
1152    if (N.getOpcode() == ARMISD::Wrapper &&
1153        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1154      return false;  // We want to select tLDRpci instead.
1155  }
1156
1157  if (!CurDAG->isBaseWithConstantOffset(N)) {
1158    if (N.getOpcode() == ARMISD::Wrapper &&
1159        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1160      Base = N.getOperand(0);
1161    } else {
1162      Base = N;
1163    }
1164
1165    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1166    return true;
1167  }
1168
1169  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1170  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1171  if ((LHSR && LHSR->getReg() == ARM::SP) ||
1172      (RHSR && RHSR->getReg() == ARM::SP)) {
1173    ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
1174    ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1175    unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
1176    unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
1177
1178    // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
1179    if (LHSC != 0 || RHSC != 0) return false;
1180
1181    Base = N;
1182    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1183    return true;
1184  }
1185
1186  // If the RHS is + imm5 * scale, fold into addr mode.
1187  int RHSC;
1188  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1189    Base = N.getOperand(0);
1190    OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1191    return true;
1192  }
1193
1194  Base = N.getOperand(0);
1195  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1196  return true;
1197}
1198
1199bool
1200ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1201                                           SDValue &OffImm) {
1202  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1203}
1204
1205bool
1206ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1207                                           SDValue &OffImm) {
1208  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1209}
1210
1211bool
1212ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1213                                           SDValue &OffImm) {
1214  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1215}
1216
1217bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1218                                            SDValue &Base, SDValue &OffImm) {
1219  if (N.getOpcode() == ISD::FrameIndex) {
1220    int FI = cast<FrameIndexSDNode>(N)->getIndex();
1221    // Only multiples of 4 are allowed for the offset, so the frame object
1222    // alignment must be at least 4.
1223    MachineFrameInfo *MFI = MF->getFrameInfo();
1224    if (MFI->getObjectAlignment(FI) < 4)
1225      MFI->setObjectAlignment(FI, 4);
1226    Base = CurDAG->getTargetFrameIndex(
1227        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1228    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1229    return true;
1230  }
1231
1232  if (!CurDAG->isBaseWithConstantOffset(N))
1233    return false;
1234
1235  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1236  if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1237      (LHSR && LHSR->getReg() == ARM::SP)) {
1238    // If the RHS is + imm8 * scale, fold into addr mode.
1239    int RHSC;
1240    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1241      Base = N.getOperand(0);
1242      if (Base.getOpcode() == ISD::FrameIndex) {
1243        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1244        // For LHS+RHS to result in an offset that's a multiple of 4 the object
1245        // indexed by the LHS must be 4-byte aligned.
1246        MachineFrameInfo *MFI = MF->getFrameInfo();
1247        if (MFI->getObjectAlignment(FI) < 4)
1248          MFI->setObjectAlignment(FI, 4);
1249        Base = CurDAG->getTargetFrameIndex(
1250            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1251      }
1252      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1253      return true;
1254    }
1255  }
1256
1257  return false;
1258}
1259
1260
1261//===----------------------------------------------------------------------===//
1262//                        Thumb 2 Addressing Modes
1263//===----------------------------------------------------------------------===//
1264
1265
1266bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
1267                                                SDValue &Opc) {
1268  if (DisableShifterOp)
1269    return false;
1270
1271  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
1272
1273  // Don't match base register only case. That is matched to a separate
1274  // lower complexity pattern with explicit register operand.
1275  if (ShOpcVal == ARM_AM::no_shift) return false;
1276
1277  BaseReg = N.getOperand(0);
1278  unsigned ShImmVal = 0;
1279  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1280    ShImmVal = RHS->getZExtValue() & 31;
1281    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
1282    return true;
1283  }
1284
1285  return false;
1286}
1287
1288bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1289                                            SDValue &Base, SDValue &OffImm) {
1290  // Match simple R + imm12 operands.
1291
1292  // Base only.
1293  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1294      !CurDAG->isBaseWithConstantOffset(N)) {
1295    if (N.getOpcode() == ISD::FrameIndex) {
1296      // Match frame index.
1297      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1298      Base = CurDAG->getTargetFrameIndex(
1299          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1300      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1301      return true;
1302    }
1303
1304    if (N.getOpcode() == ARMISD::Wrapper &&
1305        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1306      Base = N.getOperand(0);
1307      if (Base.getOpcode() == ISD::TargetConstantPool)
1308        return false;  // We want to select t2LDRpci instead.
1309    } else
1310      Base = N;
1311    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1312    return true;
1313  }
1314
1315  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1316    if (SelectT2AddrModeImm8(N, Base, OffImm))
1317      // Let t2LDRi8 handle (R - imm8).
1318      return false;
1319
1320    int RHSC = (int)RHS->getZExtValue();
1321    if (N.getOpcode() == ISD::SUB)
1322      RHSC = -RHSC;
1323
1324    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1325      Base   = N.getOperand(0);
1326      if (Base.getOpcode() == ISD::FrameIndex) {
1327        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1328        Base = CurDAG->getTargetFrameIndex(
1329            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1330      }
1331      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1332      return true;
1333    }
1334  }
1335
1336  // Base only.
1337  Base = N;
1338  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1339  return true;
1340}
1341
1342bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1343                                           SDValue &Base, SDValue &OffImm) {
1344  // Match simple R - imm8 operands.
1345  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1346      !CurDAG->isBaseWithConstantOffset(N))
1347    return false;
1348
1349  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1350    int RHSC = (int)RHS->getSExtValue();
1351    if (N.getOpcode() == ISD::SUB)
1352      RHSC = -RHSC;
1353
1354    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1355      Base = N.getOperand(0);
1356      if (Base.getOpcode() == ISD::FrameIndex) {
1357        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1358        Base = CurDAG->getTargetFrameIndex(
1359            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1360      }
1361      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1362      return true;
1363    }
1364  }
1365
1366  return false;
1367}
1368
1369bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1370                                                 SDValue &OffImm){
1371  unsigned Opcode = Op->getOpcode();
1372  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1373    ? cast<LoadSDNode>(Op)->getAddressingMode()
1374    : cast<StoreSDNode>(Op)->getAddressingMode();
1375  int RHSC;
1376  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1377    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1378      ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1379      : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1380    return true;
1381  }
1382
1383  return false;
1384}
1385
1386bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1387                                            SDValue &Base,
1388                                            SDValue &OffReg, SDValue &ShImm) {
1389  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1390  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1391    return false;
1392
1393  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1394  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1395    int RHSC = (int)RHS->getZExtValue();
1396    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1397      return false;
1398    else if (RHSC < 0 && RHSC >= -255) // 8 bits
1399      return false;
1400  }
1401
1402  // Look for (R + R) or (R + (R << [1,2,3])).
1403  unsigned ShAmt = 0;
1404  Base   = N.getOperand(0);
1405  OffReg = N.getOperand(1);
1406
1407  // Swap if it is ((R << c) + R).
1408  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1409  if (ShOpcVal != ARM_AM::lsl) {
1410    ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1411    if (ShOpcVal == ARM_AM::lsl)
1412      std::swap(Base, OffReg);
1413  }
1414
1415  if (ShOpcVal == ARM_AM::lsl) {
1416    // Check to see if the RHS of the shift is a constant, if not, we can't fold
1417    // it.
1418    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1419      ShAmt = Sh->getZExtValue();
1420      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1421        OffReg = OffReg.getOperand(0);
1422      else {
1423        ShAmt = 0;
1424      }
1425    }
1426  }
1427
1428  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1429
1430  return true;
1431}
1432
1433bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1434                                                SDValue &OffImm) {
1435  // This *must* succeed since it's used for the irreplaceable ldrex and strex
1436  // instructions.
1437  Base = N;
1438  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1439
1440  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1441    return true;
1442
1443  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1444  if (!RHS)
1445    return true;
1446
1447  uint32_t RHSC = (int)RHS->getZExtValue();
1448  if (RHSC > 1020 || RHSC % 4 != 0)
1449    return true;
1450
1451  Base = N.getOperand(0);
1452  if (Base.getOpcode() == ISD::FrameIndex) {
1453    int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1454    Base = CurDAG->getTargetFrameIndex(
1455        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1456  }
1457
1458  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1459  return true;
1460}
1461
1462//===--------------------------------------------------------------------===//
1463
1464/// getAL - Returns a ARMCC::AL immediate node.
1465static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
1466  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1467}
1468
1469SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1470  LoadSDNode *LD = cast<LoadSDNode>(N);
1471  ISD::MemIndexedMode AM = LD->getAddressingMode();
1472  if (AM == ISD::UNINDEXED)
1473    return nullptr;
1474
1475  EVT LoadedVT = LD->getMemoryVT();
1476  SDValue Offset, AMOpc;
1477  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1478  unsigned Opcode = 0;
1479  bool Match = false;
1480  if (LoadedVT == MVT::i32 && isPre &&
1481      SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1482    Opcode = ARM::LDR_PRE_IMM;
1483    Match = true;
1484  } else if (LoadedVT == MVT::i32 && !isPre &&
1485      SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1486    Opcode = ARM::LDR_POST_IMM;
1487    Match = true;
1488  } else if (LoadedVT == MVT::i32 &&
1489      SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1490    Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1491    Match = true;
1492
1493  } else if (LoadedVT == MVT::i16 &&
1494             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1495    Match = true;
1496    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1497      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1498      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1499  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1500    if (LD->getExtensionType() == ISD::SEXTLOAD) {
1501      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1502        Match = true;
1503        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1504      }
1505    } else {
1506      if (isPre &&
1507          SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1508        Match = true;
1509        Opcode = ARM::LDRB_PRE_IMM;
1510      } else if (!isPre &&
1511                  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1512        Match = true;
1513        Opcode = ARM::LDRB_POST_IMM;
1514      } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1515        Match = true;
1516        Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1517      }
1518    }
1519  }
1520
1521  if (Match) {
1522    if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1523      SDValue Chain = LD->getChain();
1524      SDValue Base = LD->getBasePtr();
1525      SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1526                       CurDAG->getRegister(0, MVT::i32), Chain };
1527      return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1528                                    MVT::i32, MVT::Other, Ops);
1529    } else {
1530      SDValue Chain = LD->getChain();
1531      SDValue Base = LD->getBasePtr();
1532      SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1533                       CurDAG->getRegister(0, MVT::i32), Chain };
1534      return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1535                                    MVT::i32, MVT::Other, Ops);
1536    }
1537  }
1538
1539  return nullptr;
1540}
1541
1542SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1543  LoadSDNode *LD = cast<LoadSDNode>(N);
1544  ISD::MemIndexedMode AM = LD->getAddressingMode();
1545  if (AM == ISD::UNINDEXED)
1546    return nullptr;
1547
1548  EVT LoadedVT = LD->getMemoryVT();
1549  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1550  SDValue Offset;
1551  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1552  unsigned Opcode = 0;
1553  bool Match = false;
1554  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1555    switch (LoadedVT.getSimpleVT().SimpleTy) {
1556    case MVT::i32:
1557      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1558      break;
1559    case MVT::i16:
1560      if (isSExtLd)
1561        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1562      else
1563        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1564      break;
1565    case MVT::i8:
1566    case MVT::i1:
1567      if (isSExtLd)
1568        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1569      else
1570        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1571      break;
1572    default:
1573      return nullptr;
1574    }
1575    Match = true;
1576  }
1577
1578  if (Match) {
1579    SDValue Chain = LD->getChain();
1580    SDValue Base = LD->getBasePtr();
1581    SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1582                     CurDAG->getRegister(0, MVT::i32), Chain };
1583    return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1584                                  MVT::Other, Ops);
1585  }
1586
1587  return nullptr;
1588}
1589
1590/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1591SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1592  SDLoc dl(V0.getNode());
1593  SDValue RegClass =
1594    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1595  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1596  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1597  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1598  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1599}
1600
1601/// \brief Form a D register from a pair of S registers.
1602SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1603  SDLoc dl(V0.getNode());
1604  SDValue RegClass =
1605    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1606  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1607  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1608  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1609  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1610}
1611
1612/// \brief Form a quad register from a pair of D registers.
1613SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1614  SDLoc dl(V0.getNode());
1615  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1616                                               MVT::i32);
1617  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1618  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1619  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1620  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1621}
1622
1623/// \brief Form 4 consecutive D registers from a pair of Q registers.
1624SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1625  SDLoc dl(V0.getNode());
1626  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1627                                               MVT::i32);
1628  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1629  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1630  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1631  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1632}
1633
1634/// \brief Form 4 consecutive S registers.
1635SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1636                                   SDValue V2, SDValue V3) {
1637  SDLoc dl(V0.getNode());
1638  SDValue RegClass =
1639    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1640  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1641  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1642  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1643  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1644  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1645                                    V2, SubReg2, V3, SubReg3 };
1646  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1647}
1648
1649/// \brief Form 4 consecutive D registers.
1650SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1651                                   SDValue V2, SDValue V3) {
1652  SDLoc dl(V0.getNode());
1653  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1654                                               MVT::i32);
1655  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1656  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1657  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1658  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1659  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1660                                    V2, SubReg2, V3, SubReg3 };
1661  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1662}
1663
1664/// \brief Form 4 consecutive Q registers.
1665SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1666                                   SDValue V2, SDValue V3) {
1667  SDLoc dl(V0.getNode());
1668  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1669                                               MVT::i32);
1670  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1671  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1672  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1673  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1674  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1675                                    V2, SubReg2, V3, SubReg3 };
1676  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1677}
1678
1679/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1680/// of a NEON VLD or VST instruction.  The supported values depend on the
1681/// number of registers being loaded.
1682SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
1683                                       unsigned NumVecs, bool is64BitVector) {
1684  unsigned NumRegs = NumVecs;
1685  if (!is64BitVector && NumVecs < 3)
1686    NumRegs *= 2;
1687
1688  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1689  if (Alignment >= 32 && NumRegs == 4)
1690    Alignment = 32;
1691  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1692    Alignment = 16;
1693  else if (Alignment >= 8)
1694    Alignment = 8;
1695  else
1696    Alignment = 0;
1697
1698  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1699}
1700
1701static bool isVLDfixed(unsigned Opc)
1702{
1703  switch (Opc) {
1704  default: return false;
1705  case ARM::VLD1d8wb_fixed : return true;
1706  case ARM::VLD1d16wb_fixed : return true;
1707  case ARM::VLD1d64Qwb_fixed : return true;
1708  case ARM::VLD1d32wb_fixed : return true;
1709  case ARM::VLD1d64wb_fixed : return true;
1710  case ARM::VLD1d64TPseudoWB_fixed : return true;
1711  case ARM::VLD1d64QPseudoWB_fixed : return true;
1712  case ARM::VLD1q8wb_fixed : return true;
1713  case ARM::VLD1q16wb_fixed : return true;
1714  case ARM::VLD1q32wb_fixed : return true;
1715  case ARM::VLD1q64wb_fixed : return true;
1716  case ARM::VLD2d8wb_fixed : return true;
1717  case ARM::VLD2d16wb_fixed : return true;
1718  case ARM::VLD2d32wb_fixed : return true;
1719  case ARM::VLD2q8PseudoWB_fixed : return true;
1720  case ARM::VLD2q16PseudoWB_fixed : return true;
1721  case ARM::VLD2q32PseudoWB_fixed : return true;
1722  case ARM::VLD2DUPd8wb_fixed : return true;
1723  case ARM::VLD2DUPd16wb_fixed : return true;
1724  case ARM::VLD2DUPd32wb_fixed : return true;
1725  }
1726}
1727
1728static bool isVSTfixed(unsigned Opc)
1729{
1730  switch (Opc) {
1731  default: return false;
1732  case ARM::VST1d8wb_fixed : return true;
1733  case ARM::VST1d16wb_fixed : return true;
1734  case ARM::VST1d32wb_fixed : return true;
1735  case ARM::VST1d64wb_fixed : return true;
1736  case ARM::VST1q8wb_fixed : return true;
1737  case ARM::VST1q16wb_fixed : return true;
1738  case ARM::VST1q32wb_fixed : return true;
1739  case ARM::VST1q64wb_fixed : return true;
1740  case ARM::VST1d64TPseudoWB_fixed : return true;
1741  case ARM::VST1d64QPseudoWB_fixed : return true;
1742  case ARM::VST2d8wb_fixed : return true;
1743  case ARM::VST2d16wb_fixed : return true;
1744  case ARM::VST2d32wb_fixed : return true;
1745  case ARM::VST2q8PseudoWB_fixed : return true;
1746  case ARM::VST2q16PseudoWB_fixed : return true;
1747  case ARM::VST2q32PseudoWB_fixed : return true;
1748  }
1749}
1750
1751// Get the register stride update opcode of a VLD/VST instruction that
1752// is otherwise equivalent to the given fixed stride updating instruction.
1753static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1754  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1755    && "Incorrect fixed stride updating instruction.");
1756  switch (Opc) {
1757  default: break;
1758  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1759  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1760  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1761  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1762  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1763  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1764  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1765  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1766  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1767  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1768  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1769  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1770
1771  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1772  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1773  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1774  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1775  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1776  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1777  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1778  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1779  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1780  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1781
1782  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1783  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1784  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1785  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1786  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1787  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1788
1789  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1790  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1791  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1792  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1793  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1794  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1795
1796  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1797  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1798  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1799  }
1800  return Opc; // If not one we handle, return it unchanged.
1801}
1802
1803SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1804                                   const uint16_t *DOpcodes,
1805                                   const uint16_t *QOpcodes0,
1806                                   const uint16_t *QOpcodes1) {
1807  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1808  SDLoc dl(N);
1809
1810  SDValue MemAddr, Align;
1811  unsigned AddrOpIdx = isUpdating ? 1 : 2;
1812  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1813    return nullptr;
1814
1815  SDValue Chain = N->getOperand(0);
1816  EVT VT = N->getValueType(0);
1817  bool is64BitVector = VT.is64BitVector();
1818  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1819
1820  unsigned OpcodeIndex;
1821  switch (VT.getSimpleVT().SimpleTy) {
1822  default: llvm_unreachable("unhandled vld type");
1823    // Double-register operations:
1824  case MVT::v8i8:  OpcodeIndex = 0; break;
1825  case MVT::v4i16: OpcodeIndex = 1; break;
1826  case MVT::v2f32:
1827  case MVT::v2i32: OpcodeIndex = 2; break;
1828  case MVT::v1i64: OpcodeIndex = 3; break;
1829    // Quad-register operations:
1830  case MVT::v16i8: OpcodeIndex = 0; break;
1831  case MVT::v8i16: OpcodeIndex = 1; break;
1832  case MVT::v4f32:
1833  case MVT::v4i32: OpcodeIndex = 2; break;
1834  case MVT::v2f64:
1835  case MVT::v2i64: OpcodeIndex = 3;
1836    assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1837    break;
1838  }
1839
1840  EVT ResTy;
1841  if (NumVecs == 1)
1842    ResTy = VT;
1843  else {
1844    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1845    if (!is64BitVector)
1846      ResTyElts *= 2;
1847    ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1848  }
1849  std::vector<EVT> ResTys;
1850  ResTys.push_back(ResTy);
1851  if (isUpdating)
1852    ResTys.push_back(MVT::i32);
1853  ResTys.push_back(MVT::Other);
1854
1855  SDValue Pred = getAL(CurDAG, dl);
1856  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1857  SDNode *VLd;
1858  SmallVector<SDValue, 7> Ops;
1859
1860  // Double registers and VLD1/VLD2 quad registers are directly supported.
1861  if (is64BitVector || NumVecs <= 2) {
1862    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1863                    QOpcodes0[OpcodeIndex]);
1864    Ops.push_back(MemAddr);
1865    Ops.push_back(Align);
1866    if (isUpdating) {
1867      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1868      // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1869      // case entirely when the rest are updated to that form, too.
1870      if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1871        Opc = getVLDSTRegisterUpdateOpcode(Opc);
1872      // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1873      // check for that explicitly too. Horribly hacky, but temporary.
1874      if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1875          !isa<ConstantSDNode>(Inc.getNode()))
1876        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1877    }
1878    Ops.push_back(Pred);
1879    Ops.push_back(Reg0);
1880    Ops.push_back(Chain);
1881    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1882
1883  } else {
1884    // Otherwise, quad registers are loaded with two separate instructions,
1885    // where one loads the even registers and the other loads the odd registers.
1886    EVT AddrTy = MemAddr.getValueType();
1887
1888    // Load the even subregs.  This is always an updating load, so that it
1889    // provides the address to the second load for the odd subregs.
1890    SDValue ImplDef =
1891      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1892    const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1893    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1894                                          ResTy, AddrTy, MVT::Other, OpsA);
1895    Chain = SDValue(VLdA, 2);
1896
1897    // Load the odd subregs.
1898    Ops.push_back(SDValue(VLdA, 1));
1899    Ops.push_back(Align);
1900    if (isUpdating) {
1901      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1902      assert(isa<ConstantSDNode>(Inc.getNode()) &&
1903             "only constant post-increment update allowed for VLD3/4");
1904      (void)Inc;
1905      Ops.push_back(Reg0);
1906    }
1907    Ops.push_back(SDValue(VLdA, 0));
1908    Ops.push_back(Pred);
1909    Ops.push_back(Reg0);
1910    Ops.push_back(Chain);
1911    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1912  }
1913
1914  // Transfer memoperands.
1915  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1916  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1917  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1918
1919  if (NumVecs == 1)
1920    return VLd;
1921
1922  // Extract out the subregisters.
1923  SDValue SuperReg = SDValue(VLd, 0);
1924  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1925         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1926  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1927  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1928    ReplaceUses(SDValue(N, Vec),
1929                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1930  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1931  if (isUpdating)
1932    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1933  return nullptr;
1934}
1935
1936SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1937                                   const uint16_t *DOpcodes,
1938                                   const uint16_t *QOpcodes0,
1939                                   const uint16_t *QOpcodes1) {
1940  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1941  SDLoc dl(N);
1942
1943  SDValue MemAddr, Align;
1944  unsigned AddrOpIdx = isUpdating ? 1 : 2;
1945  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1946  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1947    return nullptr;
1948
1949  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1950  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1951
1952  SDValue Chain = N->getOperand(0);
1953  EVT VT = N->getOperand(Vec0Idx).getValueType();
1954  bool is64BitVector = VT.is64BitVector();
1955  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1956
1957  unsigned OpcodeIndex;
1958  switch (VT.getSimpleVT().SimpleTy) {
1959  default: llvm_unreachable("unhandled vst type");
1960    // Double-register operations:
1961  case MVT::v8i8:  OpcodeIndex = 0; break;
1962  case MVT::v4i16: OpcodeIndex = 1; break;
1963  case MVT::v2f32:
1964  case MVT::v2i32: OpcodeIndex = 2; break;
1965  case MVT::v1i64: OpcodeIndex = 3; break;
1966    // Quad-register operations:
1967  case MVT::v16i8: OpcodeIndex = 0; break;
1968  case MVT::v8i16: OpcodeIndex = 1; break;
1969  case MVT::v4f32:
1970  case MVT::v4i32: OpcodeIndex = 2; break;
1971  case MVT::v2f64:
1972  case MVT::v2i64: OpcodeIndex = 3;
1973    assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1974    break;
1975  }
1976
1977  std::vector<EVT> ResTys;
1978  if (isUpdating)
1979    ResTys.push_back(MVT::i32);
1980  ResTys.push_back(MVT::Other);
1981
1982  SDValue Pred = getAL(CurDAG, dl);
1983  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1984  SmallVector<SDValue, 7> Ops;
1985
1986  // Double registers and VST1/VST2 quad registers are directly supported.
1987  if (is64BitVector || NumVecs <= 2) {
1988    SDValue SrcReg;
1989    if (NumVecs == 1) {
1990      SrcReg = N->getOperand(Vec0Idx);
1991    } else if (is64BitVector) {
1992      // Form a REG_SEQUENCE to force register allocation.
1993      SDValue V0 = N->getOperand(Vec0Idx + 0);
1994      SDValue V1 = N->getOperand(Vec0Idx + 1);
1995      if (NumVecs == 2)
1996        SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1997      else {
1998        SDValue V2 = N->getOperand(Vec0Idx + 2);
1999        // If it's a vst3, form a quad D-register and leave the last part as
2000        // an undef.
2001        SDValue V3 = (NumVecs == 3)
2002          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2003          : N->getOperand(Vec0Idx + 3);
2004        SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2005      }
2006    } else {
2007      // Form a QQ register.
2008      SDValue Q0 = N->getOperand(Vec0Idx);
2009      SDValue Q1 = N->getOperand(Vec0Idx + 1);
2010      SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2011    }
2012
2013    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2014                    QOpcodes0[OpcodeIndex]);
2015    Ops.push_back(MemAddr);
2016    Ops.push_back(Align);
2017    if (isUpdating) {
2018      SDValue Inc = N->getOperand(AddrOpIdx + 1);
2019      // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2020      // case entirely when the rest are updated to that form, too.
2021      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2022        Opc = getVLDSTRegisterUpdateOpcode(Opc);
2023      // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2024      // check for that explicitly too. Horribly hacky, but temporary.
2025      if  (!isa<ConstantSDNode>(Inc.getNode()))
2026        Ops.push_back(Inc);
2027      else if (NumVecs > 2 && !isVSTfixed(Opc))
2028        Ops.push_back(Reg0);
2029    }
2030    Ops.push_back(SrcReg);
2031    Ops.push_back(Pred);
2032    Ops.push_back(Reg0);
2033    Ops.push_back(Chain);
2034    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2035
2036    // Transfer memoperands.
2037    cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2038
2039    return VSt;
2040  }
2041
2042  // Otherwise, quad registers are stored with two separate instructions,
2043  // where one stores the even registers and the other stores the odd registers.
2044
2045  // Form the QQQQ REG_SEQUENCE.
2046  SDValue V0 = N->getOperand(Vec0Idx + 0);
2047  SDValue V1 = N->getOperand(Vec0Idx + 1);
2048  SDValue V2 = N->getOperand(Vec0Idx + 2);
2049  SDValue V3 = (NumVecs == 3)
2050    ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2051    : N->getOperand(Vec0Idx + 3);
2052  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2053
2054  // Store the even D registers.  This is always an updating store, so that it
2055  // provides the address to the second store for the odd subregs.
2056  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2057  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2058                                        MemAddr.getValueType(),
2059                                        MVT::Other, OpsA);
2060  cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2061  Chain = SDValue(VStA, 1);
2062
2063  // Store the odd D registers.
2064  Ops.push_back(SDValue(VStA, 0));
2065  Ops.push_back(Align);
2066  if (isUpdating) {
2067    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2068    assert(isa<ConstantSDNode>(Inc.getNode()) &&
2069           "only constant post-increment update allowed for VST3/4");
2070    (void)Inc;
2071    Ops.push_back(Reg0);
2072  }
2073  Ops.push_back(RegSeq);
2074  Ops.push_back(Pred);
2075  Ops.push_back(Reg0);
2076  Ops.push_back(Chain);
2077  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2078                                        Ops);
2079  cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2080  return VStB;
2081}
2082
2083SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
2084                                         bool isUpdating, unsigned NumVecs,
2085                                         const uint16_t *DOpcodes,
2086                                         const uint16_t *QOpcodes) {
2087  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2088  SDLoc dl(N);
2089
2090  SDValue MemAddr, Align;
2091  unsigned AddrOpIdx = isUpdating ? 1 : 2;
2092  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2093  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2094    return nullptr;
2095
2096  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2097  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2098
2099  SDValue Chain = N->getOperand(0);
2100  unsigned Lane =
2101    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2102  EVT VT = N->getOperand(Vec0Idx).getValueType();
2103  bool is64BitVector = VT.is64BitVector();
2104
2105  unsigned Alignment = 0;
2106  if (NumVecs != 3) {
2107    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2108    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2109    if (Alignment > NumBytes)
2110      Alignment = NumBytes;
2111    if (Alignment < 8 && Alignment < NumBytes)
2112      Alignment = 0;
2113    // Alignment must be a power of two; make sure of that.
2114    Alignment = (Alignment & -Alignment);
2115    if (Alignment == 1)
2116      Alignment = 0;
2117  }
2118  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2119
2120  unsigned OpcodeIndex;
2121  switch (VT.getSimpleVT().SimpleTy) {
2122  default: llvm_unreachable("unhandled vld/vst lane type");
2123    // Double-register operations:
2124  case MVT::v8i8:  OpcodeIndex = 0; break;
2125  case MVT::v4i16: OpcodeIndex = 1; break;
2126  case MVT::v2f32:
2127  case MVT::v2i32: OpcodeIndex = 2; break;
2128    // Quad-register operations:
2129  case MVT::v8i16: OpcodeIndex = 0; break;
2130  case MVT::v4f32:
2131  case MVT::v4i32: OpcodeIndex = 1; break;
2132  }
2133
2134  std::vector<EVT> ResTys;
2135  if (IsLoad) {
2136    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2137    if (!is64BitVector)
2138      ResTyElts *= 2;
2139    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2140                                      MVT::i64, ResTyElts));
2141  }
2142  if (isUpdating)
2143    ResTys.push_back(MVT::i32);
2144  ResTys.push_back(MVT::Other);
2145
2146  SDValue Pred = getAL(CurDAG, dl);
2147  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2148
2149  SmallVector<SDValue, 8> Ops;
2150  Ops.push_back(MemAddr);
2151  Ops.push_back(Align);
2152  if (isUpdating) {
2153    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2154    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2155  }
2156
2157  SDValue SuperReg;
2158  SDValue V0 = N->getOperand(Vec0Idx + 0);
2159  SDValue V1 = N->getOperand(Vec0Idx + 1);
2160  if (NumVecs == 2) {
2161    if (is64BitVector)
2162      SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2163    else
2164      SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2165  } else {
2166    SDValue V2 = N->getOperand(Vec0Idx + 2);
2167    SDValue V3 = (NumVecs == 3)
2168      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2169      : N->getOperand(Vec0Idx + 3);
2170    if (is64BitVector)
2171      SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2172    else
2173      SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2174  }
2175  Ops.push_back(SuperReg);
2176  Ops.push_back(getI32Imm(Lane, dl));
2177  Ops.push_back(Pred);
2178  Ops.push_back(Reg0);
2179  Ops.push_back(Chain);
2180
2181  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2182                                  QOpcodes[OpcodeIndex]);
2183  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2184  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2185  if (!IsLoad)
2186    return VLdLn;
2187
2188  // Extract the subregisters.
2189  SuperReg = SDValue(VLdLn, 0);
2190  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2191         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2192  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2193  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2194    ReplaceUses(SDValue(N, Vec),
2195                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2196  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2197  if (isUpdating)
2198    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2199  return nullptr;
2200}
2201
2202SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2203                                      unsigned NumVecs,
2204                                      const uint16_t *Opcodes) {
2205  assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2206  SDLoc dl(N);
2207
2208  SDValue MemAddr, Align;
2209  if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2210    return nullptr;
2211
2212  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2213  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2214
2215  SDValue Chain = N->getOperand(0);
2216  EVT VT = N->getValueType(0);
2217
2218  unsigned Alignment = 0;
2219  if (NumVecs != 3) {
2220    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2221    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2222    if (Alignment > NumBytes)
2223      Alignment = NumBytes;
2224    if (Alignment < 8 && Alignment < NumBytes)
2225      Alignment = 0;
2226    // Alignment must be a power of two; make sure of that.
2227    Alignment = (Alignment & -Alignment);
2228    if (Alignment == 1)
2229      Alignment = 0;
2230  }
2231  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2232
2233  unsigned OpcodeIndex;
2234  switch (VT.getSimpleVT().SimpleTy) {
2235  default: llvm_unreachable("unhandled vld-dup type");
2236  case MVT::v8i8:  OpcodeIndex = 0; break;
2237  case MVT::v4i16: OpcodeIndex = 1; break;
2238  case MVT::v2f32:
2239  case MVT::v2i32: OpcodeIndex = 2; break;
2240  }
2241
2242  SDValue Pred = getAL(CurDAG, dl);
2243  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2244  SDValue SuperReg;
2245  unsigned Opc = Opcodes[OpcodeIndex];
2246  SmallVector<SDValue, 6> Ops;
2247  Ops.push_back(MemAddr);
2248  Ops.push_back(Align);
2249  if (isUpdating) {
2250    // fixed-stride update instructions don't have an explicit writeback
2251    // operand. It's implicit in the opcode itself.
2252    SDValue Inc = N->getOperand(2);
2253    if (!isa<ConstantSDNode>(Inc.getNode()))
2254      Ops.push_back(Inc);
2255    // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2256    else if (NumVecs > 2)
2257      Ops.push_back(Reg0);
2258  }
2259  Ops.push_back(Pred);
2260  Ops.push_back(Reg0);
2261  Ops.push_back(Chain);
2262
2263  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2264  std::vector<EVT> ResTys;
2265  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2266  if (isUpdating)
2267    ResTys.push_back(MVT::i32);
2268  ResTys.push_back(MVT::Other);
2269  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2270  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2271  SuperReg = SDValue(VLdDup, 0);
2272
2273  // Extract the subregisters.
2274  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2275  unsigned SubIdx = ARM::dsub_0;
2276  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2277    ReplaceUses(SDValue(N, Vec),
2278                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2279  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2280  if (isUpdating)
2281    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2282  return nullptr;
2283}
2284
2285SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2286                                    unsigned Opc) {
2287  assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2288  SDLoc dl(N);
2289  EVT VT = N->getValueType(0);
2290  unsigned FirstTblReg = IsExt ? 2 : 1;
2291
2292  // Form a REG_SEQUENCE to force register allocation.
2293  SDValue RegSeq;
2294  SDValue V0 = N->getOperand(FirstTblReg + 0);
2295  SDValue V1 = N->getOperand(FirstTblReg + 1);
2296  if (NumVecs == 2)
2297    RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2298  else {
2299    SDValue V2 = N->getOperand(FirstTblReg + 2);
2300    // If it's a vtbl3, form a quad D-register and leave the last part as
2301    // an undef.
2302    SDValue V3 = (NumVecs == 3)
2303      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2304      : N->getOperand(FirstTblReg + 3);
2305    RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2306  }
2307
2308  SmallVector<SDValue, 6> Ops;
2309  if (IsExt)
2310    Ops.push_back(N->getOperand(1));
2311  Ops.push_back(RegSeq);
2312  Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2313  Ops.push_back(getAL(CurDAG, dl)); // predicate
2314  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2315  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2316}
2317
2318SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2319                                                     bool isSigned) {
2320  if (!Subtarget->hasV6T2Ops())
2321    return nullptr;
2322
2323  unsigned Opc = isSigned
2324    ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2325    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2326  SDLoc dl(N);
2327
2328  // For unsigned extracts, check for a shift right and mask
2329  unsigned And_imm = 0;
2330  if (N->getOpcode() == ISD::AND) {
2331    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2332
2333      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2334      if (And_imm & (And_imm + 1))
2335        return nullptr;
2336
2337      unsigned Srl_imm = 0;
2338      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2339                                Srl_imm)) {
2340        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2341
2342        // Note: The width operand is encoded as width-1.
2343        unsigned Width = countTrailingOnes(And_imm) - 1;
2344        unsigned LSB = Srl_imm;
2345
2346        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2347
2348        if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2349          // It's cheaper to use a right shift to extract the top bits.
2350          if (Subtarget->isThumb()) {
2351            Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2352            SDValue Ops[] = { N->getOperand(0).getOperand(0),
2353                              CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2354                              getAL(CurDAG, dl), Reg0, Reg0 };
2355            return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2356          }
2357
2358          // ARM models shift instructions as MOVsi with shifter operand.
2359          ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2360          SDValue ShOpc =
2361            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2362                                      MVT::i32);
2363          SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2364                            getAL(CurDAG, dl), Reg0, Reg0 };
2365          return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2366        }
2367
2368        SDValue Ops[] = { N->getOperand(0).getOperand(0),
2369                          CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2370                          CurDAG->getTargetConstant(Width, dl, MVT::i32),
2371                          getAL(CurDAG, dl), Reg0 };
2372        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2373      }
2374    }
2375    return nullptr;
2376  }
2377
2378  // Otherwise, we're looking for a shift of a shift
2379  unsigned Shl_imm = 0;
2380  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2381    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2382    unsigned Srl_imm = 0;
2383    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2384      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2385      // Note: The width operand is encoded as width-1.
2386      unsigned Width = 32 - Srl_imm - 1;
2387      int LSB = Srl_imm - Shl_imm;
2388      if (LSB < 0)
2389        return nullptr;
2390      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2391      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2392                        CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2393                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
2394                        getAL(CurDAG, dl), Reg0 };
2395      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2396    }
2397  }
2398
2399  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2400    unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2401    unsigned LSB = 0;
2402    if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2403        !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2404      return nullptr;
2405
2406    if (LSB + Width > 32)
2407      return nullptr;
2408
2409    SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2410    SDValue Ops[] = { N->getOperand(0).getOperand(0),
2411                      CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2412                      CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2413                      getAL(CurDAG, dl), Reg0 };
2414    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2415  }
2416
2417  return nullptr;
2418}
2419
2420/// Target-specific DAG combining for ISD::XOR.
2421/// Target-independent combining lowers SELECT_CC nodes of the form
2422/// select_cc setg[ge] X,  0,  X, -X
2423/// select_cc setgt    X, -1,  X, -X
2424/// select_cc setl[te] X,  0, -X,  X
2425/// select_cc setlt    X,  1, -X,  X
2426/// which represent Integer ABS into:
2427/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2428/// ARM instruction selection detects the latter and matches it to
2429/// ARM::ABS or ARM::t2ABS machine node.
2430SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2431  SDValue XORSrc0 = N->getOperand(0);
2432  SDValue XORSrc1 = N->getOperand(1);
2433  EVT VT = N->getValueType(0);
2434
2435  if (Subtarget->isThumb1Only())
2436    return nullptr;
2437
2438  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2439    return nullptr;
2440
2441  SDValue ADDSrc0 = XORSrc0.getOperand(0);
2442  SDValue ADDSrc1 = XORSrc0.getOperand(1);
2443  SDValue SRASrc0 = XORSrc1.getOperand(0);
2444  SDValue SRASrc1 = XORSrc1.getOperand(1);
2445  ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2446  EVT XType = SRASrc0.getValueType();
2447  unsigned Size = XType.getSizeInBits() - 1;
2448
2449  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2450      XType.isInteger() && SRAConstant != nullptr &&
2451      Size == SRAConstant->getZExtValue()) {
2452    unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2453    return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2454  }
2455
2456  return nullptr;
2457}
2458
2459SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2460  // The only time a CONCAT_VECTORS operation can have legal types is when
2461  // two 64-bit vectors are concatenated to a 128-bit vector.
2462  EVT VT = N->getValueType(0);
2463  if (!VT.is128BitVector() || N->getNumOperands() != 2)
2464    llvm_unreachable("unexpected CONCAT_VECTORS");
2465  return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2466}
2467
2468SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2469  SDLoc dl(N);
2470
2471  if (N->isMachineOpcode()) {
2472    N->setNodeId(-1);
2473    return nullptr;   // Already selected.
2474  }
2475
2476  switch (N->getOpcode()) {
2477  default: break;
2478  case ISD::WRITE_REGISTER: {
2479    SDNode *ResNode = SelectWriteRegister(N);
2480    if (ResNode)
2481      return ResNode;
2482    break;
2483  }
2484  case ISD::READ_REGISTER: {
2485    SDNode *ResNode = SelectReadRegister(N);
2486    if (ResNode)
2487      return ResNode;
2488    break;
2489  }
2490  case ISD::INLINEASM: {
2491    SDNode *ResNode = SelectInlineAsm(N);
2492    if (ResNode)
2493      return ResNode;
2494    break;
2495  }
2496  case ISD::XOR: {
2497    // Select special operations if XOR node forms integer ABS pattern
2498    SDNode *ResNode = SelectABSOp(N);
2499    if (ResNode)
2500      return ResNode;
2501    // Other cases are autogenerated.
2502    break;
2503  }
2504  case ISD::Constant: {
2505    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2506    bool UseCP = true;
2507    if (Subtarget->useMovt(*MF))
2508      // Thumb2-aware targets have the MOVT instruction, so all immediates can
2509      // be done with MOV + MOVT, at worst.
2510      UseCP = false;
2511    else {
2512      if (Subtarget->isThumb()) {
2513        UseCP = (Val > 255 &&                                  // MOV
2514                 ~Val > 255 &&                                 // MOV + MVN
2515                 !ARM_AM::isThumbImmShiftedVal(Val) &&         // MOV + LSL
2516                 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2517      } else
2518        UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&             // MOV
2519                 ARM_AM::getSOImmVal(~Val) == -1 &&            // MVN
2520                 !ARM_AM::isSOImmTwoPartVal(Val) &&            // two instrs.
2521                 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2522    }
2523
2524    if (UseCP) {
2525      SDValue CPIdx = CurDAG->getTargetConstantPool(
2526          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2527          TLI->getPointerTy(CurDAG->getDataLayout()));
2528
2529      SDNode *ResNode;
2530      if (Subtarget->isThumb()) {
2531        SDValue Pred = getAL(CurDAG, dl);
2532        SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2533        SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2534        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2535                                         Ops);
2536      } else {
2537        SDValue Ops[] = {
2538          CPIdx,
2539          CurDAG->getTargetConstant(0, dl, MVT::i32),
2540          getAL(CurDAG, dl),
2541          CurDAG->getRegister(0, MVT::i32),
2542          CurDAG->getEntryNode()
2543        };
2544        ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2545                                       Ops);
2546      }
2547      ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2548      return nullptr;
2549    }
2550
2551    // Other cases are autogenerated.
2552    break;
2553  }
2554  case ISD::FrameIndex: {
2555    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2556    int FI = cast<FrameIndexSDNode>(N)->getIndex();
2557    SDValue TFI = CurDAG->getTargetFrameIndex(
2558        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2559    if (Subtarget->isThumb1Only()) {
2560      // Set the alignment of the frame object to 4, to avoid having to generate
2561      // more than one ADD
2562      MachineFrameInfo *MFI = MF->getFrameInfo();
2563      if (MFI->getObjectAlignment(FI) < 4)
2564        MFI->setObjectAlignment(FI, 4);
2565      return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2566                                  CurDAG->getTargetConstant(0, dl, MVT::i32));
2567    } else {
2568      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2569                      ARM::t2ADDri : ARM::ADDri);
2570      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2571                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2572                        CurDAG->getRegister(0, MVT::i32) };
2573      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2574    }
2575  }
2576  case ISD::SRL:
2577    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2578      return I;
2579    break;
2580  case ISD::SIGN_EXTEND_INREG:
2581  case ISD::SRA:
2582    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2583      return I;
2584    break;
2585  case ISD::MUL:
2586    if (Subtarget->isThumb1Only())
2587      break;
2588    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2589      unsigned RHSV = C->getZExtValue();
2590      if (!RHSV) break;
2591      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2592        unsigned ShImm = Log2_32(RHSV-1);
2593        if (ShImm >= 32)
2594          break;
2595        SDValue V = N->getOperand(0);
2596        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2597        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2598        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2599        if (Subtarget->isThumb()) {
2600          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2601          return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2602        } else {
2603          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2604                            Reg0 };
2605          return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2606        }
2607      }
2608      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2609        unsigned ShImm = Log2_32(RHSV+1);
2610        if (ShImm >= 32)
2611          break;
2612        SDValue V = N->getOperand(0);
2613        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2614        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2615        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2616        if (Subtarget->isThumb()) {
2617          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2618          return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2619        } else {
2620          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2621                            Reg0 };
2622          return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2623        }
2624      }
2625    }
2626    break;
2627  case ISD::AND: {
2628    // Check for unsigned bitfield extract
2629    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2630      return I;
2631
2632    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2633    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2634    // are entirely contributed by c2 and lower 16-bits are entirely contributed
2635    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2636    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2637    EVT VT = N->getValueType(0);
2638    if (VT != MVT::i32)
2639      break;
2640    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2641      ? ARM::t2MOVTi16
2642      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2643    if (!Opc)
2644      break;
2645    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2646    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2647    if (!N1C)
2648      break;
2649    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2650      SDValue N2 = N0.getOperand(1);
2651      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2652      if (!N2C)
2653        break;
2654      unsigned N1CVal = N1C->getZExtValue();
2655      unsigned N2CVal = N2C->getZExtValue();
2656      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2657          (N1CVal & 0xffffU) == 0xffffU &&
2658          (N2CVal & 0xffffU) == 0x0U) {
2659        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2660                                                  dl, MVT::i32);
2661        SDValue Ops[] = { N0.getOperand(0), Imm16,
2662                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2663        return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2664      }
2665    }
2666    break;
2667  }
2668  case ARMISD::VMOVRRD:
2669    return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2670                                  N->getOperand(0), getAL(CurDAG, dl),
2671                                  CurDAG->getRegister(0, MVT::i32));
2672  case ISD::UMUL_LOHI: {
2673    if (Subtarget->isThumb1Only())
2674      break;
2675    if (Subtarget->isThumb()) {
2676      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2677                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2678      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
2679    } else {
2680      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2681                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2682                        CurDAG->getRegister(0, MVT::i32) };
2683      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2684                                    ARM::UMULL : ARM::UMULLv5,
2685                                    dl, MVT::i32, MVT::i32, Ops);
2686    }
2687  }
2688  case ISD::SMUL_LOHI: {
2689    if (Subtarget->isThumb1Only())
2690      break;
2691    if (Subtarget->isThumb()) {
2692      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2693                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2694      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
2695    } else {
2696      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2697                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2698                        CurDAG->getRegister(0, MVT::i32) };
2699      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2700                                    ARM::SMULL : ARM::SMULLv5,
2701                                    dl, MVT::i32, MVT::i32, Ops);
2702    }
2703  }
2704  case ARMISD::UMLAL:{
2705    if (Subtarget->isThumb()) {
2706      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2707                        N->getOperand(3), getAL(CurDAG, dl),
2708                        CurDAG->getRegister(0, MVT::i32)};
2709      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
2710    }else{
2711      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2712                        N->getOperand(3), getAL(CurDAG, dl),
2713                        CurDAG->getRegister(0, MVT::i32),
2714                        CurDAG->getRegister(0, MVT::i32) };
2715      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2716                                      ARM::UMLAL : ARM::UMLALv5,
2717                                      dl, MVT::i32, MVT::i32, Ops);
2718    }
2719  }
2720  case ARMISD::SMLAL:{
2721    if (Subtarget->isThumb()) {
2722      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2723                        N->getOperand(3), getAL(CurDAG, dl),
2724                        CurDAG->getRegister(0, MVT::i32)};
2725      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
2726    }else{
2727      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2728                        N->getOperand(3), getAL(CurDAG, dl),
2729                        CurDAG->getRegister(0, MVT::i32),
2730                        CurDAG->getRegister(0, MVT::i32) };
2731      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2732                                      ARM::SMLAL : ARM::SMLALv5,
2733                                      dl, MVT::i32, MVT::i32, Ops);
2734    }
2735  }
2736  case ISD::LOAD: {
2737    SDNode *ResNode = nullptr;
2738    if (Subtarget->isThumb() && Subtarget->hasThumb2())
2739      ResNode = SelectT2IndexedLoad(N);
2740    else
2741      ResNode = SelectARMIndexedLoad(N);
2742    if (ResNode)
2743      return ResNode;
2744    // Other cases are autogenerated.
2745    break;
2746  }
2747  case ARMISD::BRCOND: {
2748    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2749    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2750    // Pattern complexity = 6  cost = 1  size = 0
2751
2752    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2753    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2754    // Pattern complexity = 6  cost = 1  size = 0
2755
2756    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2757    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2758    // Pattern complexity = 6  cost = 1  size = 0
2759
2760    unsigned Opc = Subtarget->isThumb() ?
2761      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2762    SDValue Chain = N->getOperand(0);
2763    SDValue N1 = N->getOperand(1);
2764    SDValue N2 = N->getOperand(2);
2765    SDValue N3 = N->getOperand(3);
2766    SDValue InFlag = N->getOperand(4);
2767    assert(N1.getOpcode() == ISD::BasicBlock);
2768    assert(N2.getOpcode() == ISD::Constant);
2769    assert(N3.getOpcode() == ISD::Register);
2770
2771    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2772                               cast<ConstantSDNode>(N2)->getZExtValue()), dl,
2773                               MVT::i32);
2774    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2775    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2776                                             MVT::Glue, Ops);
2777    Chain = SDValue(ResNode, 0);
2778    if (N->getNumValues() == 2) {
2779      InFlag = SDValue(ResNode, 1);
2780      ReplaceUses(SDValue(N, 1), InFlag);
2781    }
2782    ReplaceUses(SDValue(N, 0),
2783                SDValue(Chain.getNode(), Chain.getResNo()));
2784    return nullptr;
2785  }
2786  case ARMISD::VZIP: {
2787    unsigned Opc = 0;
2788    EVT VT = N->getValueType(0);
2789    switch (VT.getSimpleVT().SimpleTy) {
2790    default: return nullptr;
2791    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2792    case MVT::v4i16: Opc = ARM::VZIPd16; break;
2793    case MVT::v2f32:
2794    // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2795    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2796    case MVT::v16i8: Opc = ARM::VZIPq8; break;
2797    case MVT::v8i16: Opc = ARM::VZIPq16; break;
2798    case MVT::v4f32:
2799    case MVT::v4i32: Opc = ARM::VZIPq32; break;
2800    }
2801    SDValue Pred = getAL(CurDAG, dl);
2802    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2803    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2804    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2805  }
2806  case ARMISD::VUZP: {
2807    unsigned Opc = 0;
2808    EVT VT = N->getValueType(0);
2809    switch (VT.getSimpleVT().SimpleTy) {
2810    default: return nullptr;
2811    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2812    case MVT::v4i16: Opc = ARM::VUZPd16; break;
2813    case MVT::v2f32:
2814    // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2815    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2816    case MVT::v16i8: Opc = ARM::VUZPq8; break;
2817    case MVT::v8i16: Opc = ARM::VUZPq16; break;
2818    case MVT::v4f32:
2819    case MVT::v4i32: Opc = ARM::VUZPq32; break;
2820    }
2821    SDValue Pred = getAL(CurDAG, dl);
2822    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2823    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2824    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2825  }
2826  case ARMISD::VTRN: {
2827    unsigned Opc = 0;
2828    EVT VT = N->getValueType(0);
2829    switch (VT.getSimpleVT().SimpleTy) {
2830    default: return nullptr;
2831    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2832    case MVT::v4i16: Opc = ARM::VTRNd16; break;
2833    case MVT::v2f32:
2834    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2835    case MVT::v16i8: Opc = ARM::VTRNq8; break;
2836    case MVT::v8i16: Opc = ARM::VTRNq16; break;
2837    case MVT::v4f32:
2838    case MVT::v4i32: Opc = ARM::VTRNq32; break;
2839    }
2840    SDValue Pred = getAL(CurDAG, dl);
2841    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2842    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2843    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2844  }
2845  case ARMISD::BUILD_VECTOR: {
2846    EVT VecVT = N->getValueType(0);
2847    EVT EltVT = VecVT.getVectorElementType();
2848    unsigned NumElts = VecVT.getVectorNumElements();
2849    if (EltVT == MVT::f64) {
2850      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2851      return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2852    }
2853    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2854    if (NumElts == 2)
2855      return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2856    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2857    return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2858                     N->getOperand(2), N->getOperand(3));
2859  }
2860
2861  case ARMISD::VLD2DUP: {
2862    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2863                                        ARM::VLD2DUPd32 };
2864    return SelectVLDDup(N, false, 2, Opcodes);
2865  }
2866
2867  case ARMISD::VLD3DUP: {
2868    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2869                                        ARM::VLD3DUPd16Pseudo,
2870                                        ARM::VLD3DUPd32Pseudo };
2871    return SelectVLDDup(N, false, 3, Opcodes);
2872  }
2873
2874  case ARMISD::VLD4DUP: {
2875    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2876                                        ARM::VLD4DUPd16Pseudo,
2877                                        ARM::VLD4DUPd32Pseudo };
2878    return SelectVLDDup(N, false, 4, Opcodes);
2879  }
2880
2881  case ARMISD::VLD2DUP_UPD: {
2882    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2883                                        ARM::VLD2DUPd16wb_fixed,
2884                                        ARM::VLD2DUPd32wb_fixed };
2885    return SelectVLDDup(N, true, 2, Opcodes);
2886  }
2887
2888  case ARMISD::VLD3DUP_UPD: {
2889    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2890                                        ARM::VLD3DUPd16Pseudo_UPD,
2891                                        ARM::VLD3DUPd32Pseudo_UPD };
2892    return SelectVLDDup(N, true, 3, Opcodes);
2893  }
2894
2895  case ARMISD::VLD4DUP_UPD: {
2896    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2897                                        ARM::VLD4DUPd16Pseudo_UPD,
2898                                        ARM::VLD4DUPd32Pseudo_UPD };
2899    return SelectVLDDup(N, true, 4, Opcodes);
2900  }
2901
2902  case ARMISD::VLD1_UPD: {
2903    static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
2904                                         ARM::VLD1d16wb_fixed,
2905                                         ARM::VLD1d32wb_fixed,
2906                                         ARM::VLD1d64wb_fixed };
2907    static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
2908                                         ARM::VLD1q16wb_fixed,
2909                                         ARM::VLD1q32wb_fixed,
2910                                         ARM::VLD1q64wb_fixed };
2911    return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
2912  }
2913
2914  case ARMISD::VLD2_UPD: {
2915    static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
2916                                         ARM::VLD2d16wb_fixed,
2917                                         ARM::VLD2d32wb_fixed,
2918                                         ARM::VLD1q64wb_fixed};
2919    static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
2920                                         ARM::VLD2q16PseudoWB_fixed,
2921                                         ARM::VLD2q32PseudoWB_fixed };
2922    return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
2923  }
2924
2925  case ARMISD::VLD3_UPD: {
2926    static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
2927                                         ARM::VLD3d16Pseudo_UPD,
2928                                         ARM::VLD3d32Pseudo_UPD,
2929                                         ARM::VLD1d64TPseudoWB_fixed};
2930    static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2931                                          ARM::VLD3q16Pseudo_UPD,
2932                                          ARM::VLD3q32Pseudo_UPD };
2933    static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2934                                          ARM::VLD3q16oddPseudo_UPD,
2935                                          ARM::VLD3q32oddPseudo_UPD };
2936    return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2937  }
2938
2939  case ARMISD::VLD4_UPD: {
2940    static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
2941                                         ARM::VLD4d16Pseudo_UPD,
2942                                         ARM::VLD4d32Pseudo_UPD,
2943                                         ARM::VLD1d64QPseudoWB_fixed};
2944    static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2945                                          ARM::VLD4q16Pseudo_UPD,
2946                                          ARM::VLD4q32Pseudo_UPD };
2947    static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
2948                                          ARM::VLD4q16oddPseudo_UPD,
2949                                          ARM::VLD4q32oddPseudo_UPD };
2950    return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2951  }
2952
2953  case ARMISD::VLD2LN_UPD: {
2954    static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
2955                                         ARM::VLD2LNd16Pseudo_UPD,
2956                                         ARM::VLD2LNd32Pseudo_UPD };
2957    static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
2958                                         ARM::VLD2LNq32Pseudo_UPD };
2959    return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
2960  }
2961
2962  case ARMISD::VLD3LN_UPD: {
2963    static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
2964                                         ARM::VLD3LNd16Pseudo_UPD,
2965                                         ARM::VLD3LNd32Pseudo_UPD };
2966    static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
2967                                         ARM::VLD3LNq32Pseudo_UPD };
2968    return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
2969  }
2970
2971  case ARMISD::VLD4LN_UPD: {
2972    static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
2973                                         ARM::VLD4LNd16Pseudo_UPD,
2974                                         ARM::VLD4LNd32Pseudo_UPD };
2975    static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
2976                                         ARM::VLD4LNq32Pseudo_UPD };
2977    return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
2978  }
2979
2980  case ARMISD::VST1_UPD: {
2981    static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
2982                                         ARM::VST1d16wb_fixed,
2983                                         ARM::VST1d32wb_fixed,
2984                                         ARM::VST1d64wb_fixed };
2985    static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
2986                                         ARM::VST1q16wb_fixed,
2987                                         ARM::VST1q32wb_fixed,
2988                                         ARM::VST1q64wb_fixed };
2989    return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
2990  }
2991
2992  case ARMISD::VST2_UPD: {
2993    static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
2994                                         ARM::VST2d16wb_fixed,
2995                                         ARM::VST2d32wb_fixed,
2996                                         ARM::VST1q64wb_fixed};
2997    static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
2998                                         ARM::VST2q16PseudoWB_fixed,
2999                                         ARM::VST2q32PseudoWB_fixed };
3000    return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3001  }
3002
3003  case ARMISD::VST3_UPD: {
3004    static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3005                                         ARM::VST3d16Pseudo_UPD,
3006                                         ARM::VST3d32Pseudo_UPD,
3007                                         ARM::VST1d64TPseudoWB_fixed};
3008    static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3009                                          ARM::VST3q16Pseudo_UPD,
3010                                          ARM::VST3q32Pseudo_UPD };
3011    static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3012                                          ARM::VST3q16oddPseudo_UPD,
3013                                          ARM::VST3q32oddPseudo_UPD };
3014    return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3015  }
3016
3017  case ARMISD::VST4_UPD: {
3018    static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3019                                         ARM::VST4d16Pseudo_UPD,
3020                                         ARM::VST4d32Pseudo_UPD,
3021                                         ARM::VST1d64QPseudoWB_fixed};
3022    static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3023                                          ARM::VST4q16Pseudo_UPD,
3024                                          ARM::VST4q32Pseudo_UPD };
3025    static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3026                                          ARM::VST4q16oddPseudo_UPD,
3027                                          ARM::VST4q32oddPseudo_UPD };
3028    return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3029  }
3030
3031  case ARMISD::VST2LN_UPD: {
3032    static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3033                                         ARM::VST2LNd16Pseudo_UPD,
3034                                         ARM::VST2LNd32Pseudo_UPD };
3035    static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3036                                         ARM::VST2LNq32Pseudo_UPD };
3037    return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3038  }
3039
3040  case ARMISD::VST3LN_UPD: {
3041    static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3042                                         ARM::VST3LNd16Pseudo_UPD,
3043                                         ARM::VST3LNd32Pseudo_UPD };
3044    static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3045                                         ARM::VST3LNq32Pseudo_UPD };
3046    return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3047  }
3048
3049  case ARMISD::VST4LN_UPD: {
3050    static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3051                                         ARM::VST4LNd16Pseudo_UPD,
3052                                         ARM::VST4LNd32Pseudo_UPD };
3053    static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3054                                         ARM::VST4LNq32Pseudo_UPD };
3055    return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3056  }
3057
3058  case ISD::INTRINSIC_VOID:
3059  case ISD::INTRINSIC_W_CHAIN: {
3060    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3061    switch (IntNo) {
3062    default:
3063      break;
3064
3065    case Intrinsic::arm_ldaexd:
3066    case Intrinsic::arm_ldrexd: {
3067      SDLoc dl(N);
3068      SDValue Chain = N->getOperand(0);
3069      SDValue MemAddr = N->getOperand(2);
3070      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3071
3072      bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3073      unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3074                                : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3075
3076      // arm_ldrexd returns a i64 value in {i32, i32}
3077      std::vector<EVT> ResTys;
3078      if (isThumb) {
3079        ResTys.push_back(MVT::i32);
3080        ResTys.push_back(MVT::i32);
3081      } else
3082        ResTys.push_back(MVT::Untyped);
3083      ResTys.push_back(MVT::Other);
3084
3085      // Place arguments in the right order.
3086      SmallVector<SDValue, 7> Ops;
3087      Ops.push_back(MemAddr);
3088      Ops.push_back(getAL(CurDAG, dl));
3089      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3090      Ops.push_back(Chain);
3091      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3092      // Transfer memoperands.
3093      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3094      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3095      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3096
3097      // Remap uses.
3098      SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3099      if (!SDValue(N, 0).use_empty()) {
3100        SDValue Result;
3101        if (isThumb)
3102          Result = SDValue(Ld, 0);
3103        else {
3104          SDValue SubRegIdx =
3105            CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3106          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3107              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3108          Result = SDValue(ResNode,0);
3109        }
3110        ReplaceUses(SDValue(N, 0), Result);
3111      }
3112      if (!SDValue(N, 1).use_empty()) {
3113        SDValue Result;
3114        if (isThumb)
3115          Result = SDValue(Ld, 1);
3116        else {
3117          SDValue SubRegIdx =
3118            CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3119          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3120              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3121          Result = SDValue(ResNode,0);
3122        }
3123        ReplaceUses(SDValue(N, 1), Result);
3124      }
3125      ReplaceUses(SDValue(N, 2), OutChain);
3126      return nullptr;
3127    }
3128    case Intrinsic::arm_stlexd:
3129    case Intrinsic::arm_strexd: {
3130      SDLoc dl(N);
3131      SDValue Chain = N->getOperand(0);
3132      SDValue Val0 = N->getOperand(2);
3133      SDValue Val1 = N->getOperand(3);
3134      SDValue MemAddr = N->getOperand(4);
3135
3136      // Store exclusive double return a i32 value which is the return status
3137      // of the issued store.
3138      const EVT ResTys[] = {MVT::i32, MVT::Other};
3139
3140      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3141      // Place arguments in the right order.
3142      SmallVector<SDValue, 7> Ops;
3143      if (isThumb) {
3144        Ops.push_back(Val0);
3145        Ops.push_back(Val1);
3146      } else
3147        // arm_strexd uses GPRPair.
3148        Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3149      Ops.push_back(MemAddr);
3150      Ops.push_back(getAL(CurDAG, dl));
3151      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3152      Ops.push_back(Chain);
3153
3154      bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3155      unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3156                                : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3157
3158      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3159      // Transfer memoperands.
3160      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3161      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3162      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3163
3164      return St;
3165    }
3166
3167    case Intrinsic::arm_neon_vld1: {
3168      static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3169                                           ARM::VLD1d32, ARM::VLD1d64 };
3170      static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3171                                           ARM::VLD1q32, ARM::VLD1q64};
3172      return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3173    }
3174
3175    case Intrinsic::arm_neon_vld2: {
3176      static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3177                                           ARM::VLD2d32, ARM::VLD1q64 };
3178      static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3179                                           ARM::VLD2q32Pseudo };
3180      return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3181    }
3182
3183    case Intrinsic::arm_neon_vld3: {
3184      static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3185                                           ARM::VLD3d16Pseudo,
3186                                           ARM::VLD3d32Pseudo,
3187                                           ARM::VLD1d64TPseudo };
3188      static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3189                                            ARM::VLD3q16Pseudo_UPD,
3190                                            ARM::VLD3q32Pseudo_UPD };
3191      static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3192                                            ARM::VLD3q16oddPseudo,
3193                                            ARM::VLD3q32oddPseudo };
3194      return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3195    }
3196
3197    case Intrinsic::arm_neon_vld4: {
3198      static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3199                                           ARM::VLD4d16Pseudo,
3200                                           ARM::VLD4d32Pseudo,
3201                                           ARM::VLD1d64QPseudo };
3202      static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3203                                            ARM::VLD4q16Pseudo_UPD,
3204                                            ARM::VLD4q32Pseudo_UPD };
3205      static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3206                                            ARM::VLD4q16oddPseudo,
3207                                            ARM::VLD4q32oddPseudo };
3208      return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3209    }
3210
3211    case Intrinsic::arm_neon_vld2lane: {
3212      static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3213                                           ARM::VLD2LNd16Pseudo,
3214                                           ARM::VLD2LNd32Pseudo };
3215      static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3216                                           ARM::VLD2LNq32Pseudo };
3217      return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3218    }
3219
3220    case Intrinsic::arm_neon_vld3lane: {
3221      static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3222                                           ARM::VLD3LNd16Pseudo,
3223                                           ARM::VLD3LNd32Pseudo };
3224      static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3225                                           ARM::VLD3LNq32Pseudo };
3226      return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3227    }
3228
3229    case Intrinsic::arm_neon_vld4lane: {
3230      static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3231                                           ARM::VLD4LNd16Pseudo,
3232                                           ARM::VLD4LNd32Pseudo };
3233      static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3234                                           ARM::VLD4LNq32Pseudo };
3235      return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3236    }
3237
3238    case Intrinsic::arm_neon_vst1: {
3239      static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3240                                           ARM::VST1d32, ARM::VST1d64 };
3241      static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3242                                           ARM::VST1q32, ARM::VST1q64 };
3243      return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3244    }
3245
3246    case Intrinsic::arm_neon_vst2: {
3247      static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3248                                           ARM::VST2d32, ARM::VST1q64 };
3249      static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3250                                     ARM::VST2q32Pseudo };
3251      return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3252    }
3253
3254    case Intrinsic::arm_neon_vst3: {
3255      static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3256                                           ARM::VST3d16Pseudo,
3257                                           ARM::VST3d32Pseudo,
3258                                           ARM::VST1d64TPseudo };
3259      static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3260                                            ARM::VST3q16Pseudo_UPD,
3261                                            ARM::VST3q32Pseudo_UPD };
3262      static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3263                                            ARM::VST3q16oddPseudo,
3264                                            ARM::VST3q32oddPseudo };
3265      return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3266    }
3267
3268    case Intrinsic::arm_neon_vst4: {
3269      static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3270                                           ARM::VST4d16Pseudo,
3271                                           ARM::VST4d32Pseudo,
3272                                           ARM::VST1d64QPseudo };
3273      static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3274                                            ARM::VST4q16Pseudo_UPD,
3275                                            ARM::VST4q32Pseudo_UPD };
3276      static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3277                                            ARM::VST4q16oddPseudo,
3278                                            ARM::VST4q32oddPseudo };
3279      return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3280    }
3281
3282    case Intrinsic::arm_neon_vst2lane: {
3283      static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3284                                           ARM::VST2LNd16Pseudo,
3285                                           ARM::VST2LNd32Pseudo };
3286      static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3287                                           ARM::VST2LNq32Pseudo };
3288      return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3289    }
3290
3291    case Intrinsic::arm_neon_vst3lane: {
3292      static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3293                                           ARM::VST3LNd16Pseudo,
3294                                           ARM::VST3LNd32Pseudo };
3295      static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3296                                           ARM::VST3LNq32Pseudo };
3297      return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3298    }
3299
3300    case Intrinsic::arm_neon_vst4lane: {
3301      static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3302                                           ARM::VST4LNd16Pseudo,
3303                                           ARM::VST4LNd32Pseudo };
3304      static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3305                                           ARM::VST4LNq32Pseudo };
3306      return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3307    }
3308    }
3309    break;
3310  }
3311
3312  case ISD::INTRINSIC_WO_CHAIN: {
3313    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3314    switch (IntNo) {
3315    default:
3316      break;
3317
3318    case Intrinsic::arm_neon_vtbl2:
3319      return SelectVTBL(N, false, 2, ARM::VTBL2);
3320    case Intrinsic::arm_neon_vtbl3:
3321      return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3322    case Intrinsic::arm_neon_vtbl4:
3323      return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3324
3325    case Intrinsic::arm_neon_vtbx2:
3326      return SelectVTBL(N, true, 2, ARM::VTBX2);
3327    case Intrinsic::arm_neon_vtbx3:
3328      return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3329    case Intrinsic::arm_neon_vtbx4:
3330      return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3331    }
3332    break;
3333  }
3334
3335  case ARMISD::VTBL1: {
3336    SDLoc dl(N);
3337    EVT VT = N->getValueType(0);
3338    SmallVector<SDValue, 6> Ops;
3339
3340    Ops.push_back(N->getOperand(0));
3341    Ops.push_back(N->getOperand(1));
3342    Ops.push_back(getAL(CurDAG, dl));                // Predicate
3343    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3344    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
3345  }
3346  case ARMISD::VTBL2: {
3347    SDLoc dl(N);
3348    EVT VT = N->getValueType(0);
3349
3350    // Form a REG_SEQUENCE to force register allocation.
3351    SDValue V0 = N->getOperand(0);
3352    SDValue V1 = N->getOperand(1);
3353    SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3354
3355    SmallVector<SDValue, 6> Ops;
3356    Ops.push_back(RegSeq);
3357    Ops.push_back(N->getOperand(2));
3358    Ops.push_back(getAL(CurDAG, dl));                // Predicate
3359    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3360    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
3361  }
3362
3363  case ISD::CONCAT_VECTORS:
3364    return SelectConcatVector(N);
3365  }
3366
3367  return SelectCode(N);
3368}
3369
3370// Inspect a register string of the form
3371// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3372// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3373// and obtain the integer operands from them, adding these operands to the
3374// provided vector.
3375static void getIntOperandsFromRegisterString(StringRef RegString,
3376                                             SelectionDAG *CurDAG, SDLoc DL,
3377                                             std::vector<SDValue>& Ops) {
3378  SmallVector<StringRef, 5> Fields;
3379  RegString.split(Fields, ":");
3380
3381  if (Fields.size() > 1) {
3382    bool AllIntFields = true;
3383
3384    for (StringRef Field : Fields) {
3385      // Need to trim out leading 'cp' characters and get the integer field.
3386      unsigned IntField;
3387      AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3388      Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3389    }
3390
3391    assert(AllIntFields &&
3392            "Unexpected non-integer value in special register string.");
3393  }
3394}
3395
3396// Maps a Banked Register string to its mask value. The mask value returned is
3397// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3398// mask operand, which expresses which register is to be used, e.g. r8, and in
3399// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3400// was invalid.
3401static inline int getBankedRegisterMask(StringRef RegString) {
3402  return StringSwitch<int>(RegString.lower())
3403          .Case("r8_usr", 0x00)
3404          .Case("r9_usr", 0x01)
3405          .Case("r10_usr", 0x02)
3406          .Case("r11_usr", 0x03)
3407          .Case("r12_usr", 0x04)
3408          .Case("sp_usr", 0x05)
3409          .Case("lr_usr", 0x06)
3410          .Case("r8_fiq", 0x08)
3411          .Case("r9_fiq", 0x09)
3412          .Case("r10_fiq", 0x0a)
3413          .Case("r11_fiq", 0x0b)
3414          .Case("r12_fiq", 0x0c)
3415          .Case("sp_fiq", 0x0d)
3416          .Case("lr_fiq", 0x0e)
3417          .Case("lr_irq", 0x10)
3418          .Case("sp_irq", 0x11)
3419          .Case("lr_svc", 0x12)
3420          .Case("sp_svc", 0x13)
3421          .Case("lr_abt", 0x14)
3422          .Case("sp_abt", 0x15)
3423          .Case("lr_und", 0x16)
3424          .Case("sp_und", 0x17)
3425          .Case("lr_mon", 0x1c)
3426          .Case("sp_mon", 0x1d)
3427          .Case("elr_hyp", 0x1e)
3428          .Case("sp_hyp", 0x1f)
3429          .Case("spsr_fiq", 0x2e)
3430          .Case("spsr_irq", 0x30)
3431          .Case("spsr_svc", 0x32)
3432          .Case("spsr_abt", 0x34)
3433          .Case("spsr_und", 0x36)
3434          .Case("spsr_mon", 0x3c)
3435          .Case("spsr_hyp", 0x3e)
3436          .Default(-1);
3437}
3438
3439// Maps a MClass special register string to its value for use in the
3440// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3441// Returns -1 to signify that the string was invalid.
3442static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3443  return StringSwitch<int>(RegString.lower())
3444          .Case("apsr", 0x0)
3445          .Case("iapsr", 0x1)
3446          .Case("eapsr", 0x2)
3447          .Case("xpsr", 0x3)
3448          .Case("ipsr", 0x5)
3449          .Case("epsr", 0x6)
3450          .Case("iepsr", 0x7)
3451          .Case("msp", 0x8)
3452          .Case("psp", 0x9)
3453          .Case("primask", 0x10)
3454          .Case("basepri", 0x11)
3455          .Case("basepri_max", 0x12)
3456          .Case("faultmask", 0x13)
3457          .Case("control", 0x14)
3458          .Default(-1);
3459}
3460
3461// The flags here are common to those allowed for apsr in the A class cores and
3462// those allowed for the special registers in the M class cores. Returns a
3463// value representing which flags were present, -1 if invalid.
3464static inline int getMClassFlagsMask(StringRef Flags) {
3465  if (Flags.empty())
3466    return 0x3;
3467
3468  return StringSwitch<int>(Flags)
3469          .Case("g", 0x1)
3470          .Case("nzcvq", 0x2)
3471          .Case("nzcvqg", 0x3)
3472          .Default(-1);
3473}
3474
3475static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3476                                 const ARMSubtarget *Subtarget) {
3477  // Ensure that the register (without flags) was a valid M Class special
3478  // register.
3479  int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3480  if (SYSmvalue == -1)
3481    return -1;
3482
3483  // basepri, basepri_max and faultmask are only valid for V7m.
3484  if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3485    return -1;
3486
3487  // If it was a read then we won't be expecting flags and so at this point
3488  // we can return the mask.
3489  if (IsRead) {
3490    assert (Flags.empty() && "Unexpected flags for reading M class register.");
3491    return SYSmvalue;
3492  }
3493
3494  // We know we are now handling a write so need to get the mask for the flags.
3495  int Mask = getMClassFlagsMask(Flags);
3496
3497  // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3498  // shouldn't have flags present.
3499  if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3500    return -1;
3501
3502  // The _g and _nzcvqg versions are only valid if the DSP extension is
3503  // available.
3504  if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
3505    return -1;
3506
3507  // The register was valid so need to put the mask in the correct place
3508  // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3509  // construct the operand for the instruction node.
3510  if (SYSmvalue < 0x4)
3511    return SYSmvalue | Mask << 10;
3512
3513  return SYSmvalue;
3514}
3515
3516static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3517  // The mask operand contains the special register (R Bit) in bit 4, whether
3518  // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3519  // bits 3-0 contains the fields to be accessed in the special register, set by
3520  // the flags provided with the register.
3521  int Mask = 0;
3522  if (Reg == "apsr") {
3523    // The flags permitted for apsr are the same flags that are allowed in
3524    // M class registers. We get the flag value and then shift the flags into
3525    // the correct place to combine with the mask.
3526    Mask = getMClassFlagsMask(Flags);
3527    if (Mask == -1)
3528      return -1;
3529    return Mask << 2;
3530  }
3531
3532  if (Reg != "cpsr" && Reg != "spsr") {
3533    return -1;
3534  }
3535
3536  // This is the same as if the flags were "fc"
3537  if (Flags.empty() || Flags == "all")
3538    return Mask | 0x9;
3539
3540  // Inspect the supplied flags string and set the bits in the mask for
3541  // the relevant and valid flags allowed for cpsr and spsr.
3542  for (char Flag : Flags) {
3543    int FlagVal;
3544    switch (Flag) {
3545      case 'c':
3546        FlagVal = 0x1;
3547        break;
3548      case 'x':
3549        FlagVal = 0x2;
3550        break;
3551      case 's':
3552        FlagVal = 0x4;
3553        break;
3554      case 'f':
3555        FlagVal = 0x8;
3556        break;
3557      default:
3558        FlagVal = 0;
3559    }
3560
3561    // This avoids allowing strings where the same flag bit appears twice.
3562    if (!FlagVal || (Mask & FlagVal))
3563      return -1;
3564    Mask |= FlagVal;
3565  }
3566
3567  // If the register is spsr then we need to set the R bit.
3568  if (Reg == "spsr")
3569    Mask |= 0x10;
3570
3571  return Mask;
3572}
3573
3574// Lower the read_register intrinsic to ARM specific DAG nodes
3575// using the supplied metadata string to select the instruction node to use
3576// and the registers/masks to construct as operands for the node.
3577SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
3578  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3579  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3580  bool IsThumb2 = Subtarget->isThumb2();
3581  SDLoc DL(N);
3582
3583  std::vector<SDValue> Ops;
3584  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3585
3586  if (!Ops.empty()) {
3587    // If the special register string was constructed of fields (as defined
3588    // in the ACLE) then need to lower to MRC node (32 bit) or
3589    // MRRC node(64 bit), we can make the distinction based on the number of
3590    // operands we have.
3591    unsigned Opcode;
3592    SmallVector<EVT, 3> ResTypes;
3593    if (Ops.size() == 5){
3594      Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3595      ResTypes.append({ MVT::i32, MVT::Other });
3596    } else {
3597      assert(Ops.size() == 3 &&
3598              "Invalid number of fields in special register string.");
3599      Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3600      ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3601    }
3602
3603    Ops.push_back(getAL(CurDAG, DL));
3604    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3605    Ops.push_back(N->getOperand(0));
3606    return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
3607  }
3608
3609  std::string SpecialReg = RegString->getString().lower();
3610
3611  int BankedReg = getBankedRegisterMask(SpecialReg);
3612  if (BankedReg != -1) {
3613    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3614            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3615            N->getOperand(0) };
3616    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3617                                  DL, MVT::i32, MVT::Other, Ops);
3618  }
3619
3620  // The VFP registers are read by creating SelectionDAG nodes with opcodes
3621  // corresponding to the register that is being read from. So we switch on the
3622  // string to find which opcode we need to use.
3623  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3624                    .Case("fpscr", ARM::VMRS)
3625                    .Case("fpexc", ARM::VMRS_FPEXC)
3626                    .Case("fpsid", ARM::VMRS_FPSID)
3627                    .Case("mvfr0", ARM::VMRS_MVFR0)
3628                    .Case("mvfr1", ARM::VMRS_MVFR1)
3629                    .Case("mvfr2", ARM::VMRS_MVFR2)
3630                    .Case("fpinst", ARM::VMRS_FPINST)
3631                    .Case("fpinst2", ARM::VMRS_FPINST2)
3632                    .Default(0);
3633
3634  // If an opcode was found then we can lower the read to a VFP instruction.
3635  if (Opcode) {
3636    if (!Subtarget->hasVFP2())
3637      return nullptr;
3638    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3639      return nullptr;
3640
3641    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3642            N->getOperand(0) };
3643    return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
3644  }
3645
3646  // If the target is M Class then need to validate that the register string
3647  // is an acceptable value, so check that a mask can be constructed from the
3648  // string.
3649  if (Subtarget->isMClass()) {
3650    int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
3651    if (SYSmValue == -1)
3652      return nullptr;
3653
3654    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3655                      getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3656                      N->getOperand(0) };
3657    return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
3658  }
3659
3660  // Here we know the target is not M Class so we need to check if it is one
3661  // of the remaining possible values which are apsr, cpsr or spsr.
3662  if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
3663    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3664            N->getOperand(0) };
3665    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
3666                                  MVT::i32, MVT::Other, Ops);
3667  }
3668
3669  if (SpecialReg == "spsr") {
3670    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3671            N->getOperand(0) };
3672    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
3673                                  DL, MVT::i32, MVT::Other, Ops);
3674  }
3675
3676  return nullptr;
3677}
3678
3679// Lower the write_register intrinsic to ARM specific DAG nodes
3680// using the supplied metadata string to select the instruction node to use
3681// and the registers/masks to use in the nodes
3682SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
3683  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3684  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3685  bool IsThumb2 = Subtarget->isThumb2();
3686  SDLoc DL(N);
3687
3688  std::vector<SDValue> Ops;
3689  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3690
3691  if (!Ops.empty()) {
3692    // If the special register string was constructed of fields (as defined
3693    // in the ACLE) then need to lower to MCR node (32 bit) or
3694    // MCRR node(64 bit), we can make the distinction based on the number of
3695    // operands we have.
3696    unsigned Opcode;
3697    if (Ops.size() == 5) {
3698      Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
3699      Ops.insert(Ops.begin()+2, N->getOperand(2));
3700    } else {
3701      assert(Ops.size() == 3 &&
3702              "Invalid number of fields in special register string.");
3703      Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
3704      SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
3705      Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
3706    }
3707
3708    Ops.push_back(getAL(CurDAG, DL));
3709    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3710    Ops.push_back(N->getOperand(0));
3711
3712    return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3713  }
3714
3715  std::string SpecialReg = RegString->getString().lower();
3716  int BankedReg = getBankedRegisterMask(SpecialReg);
3717  if (BankedReg != -1) {
3718    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
3719            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3720            N->getOperand(0) };
3721    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
3722                                  DL, MVT::Other, Ops);
3723  }
3724
3725  // The VFP registers are written to by creating SelectionDAG nodes with
3726  // opcodes corresponding to the register that is being written. So we switch
3727  // on the string to find which opcode we need to use.
3728  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3729                    .Case("fpscr", ARM::VMSR)
3730                    .Case("fpexc", ARM::VMSR_FPEXC)
3731                    .Case("fpsid", ARM::VMSR_FPSID)
3732                    .Case("fpinst", ARM::VMSR_FPINST)
3733                    .Case("fpinst2", ARM::VMSR_FPINST2)
3734                    .Default(0);
3735
3736  if (Opcode) {
3737    if (!Subtarget->hasVFP2())
3738      return nullptr;
3739    Ops = { N->getOperand(2), getAL(CurDAG, DL),
3740            CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3741    return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3742  }
3743
3744  SmallVector<StringRef, 5> Fields;
3745  StringRef(SpecialReg).split(Fields, "_", 1, false);
3746  std::string Reg = Fields[0].str();
3747  StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
3748
3749  // If the target was M Class then need to validate the special register value
3750  // and retrieve the mask for use in the instruction node.
3751  if (Subtarget->isMClass()) {
3752    // basepri_max gets split so need to correct Reg and Flags.
3753    if (SpecialReg == "basepri_max") {
3754      Reg = SpecialReg;
3755      Flags = "";
3756    }
3757    int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
3758    if (SYSmValue == -1)
3759      return nullptr;
3760
3761    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3762                      N->getOperand(2), getAL(CurDAG, DL),
3763                      CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3764    return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
3765  }
3766
3767  // We then check to see if a valid mask can be constructed for one of the
3768  // register string values permitted for the A and R class cores. These values
3769  // are apsr, spsr and cpsr; these are also valid on older cores.
3770  int Mask = getARClassRegisterMask(Reg, Flags);
3771  if (Mask != -1) {
3772    Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
3773            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3774            N->getOperand(0) };
3775    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
3776                                  DL, MVT::Other, Ops);
3777  }
3778
3779  return nullptr;
3780}
3781
3782SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
3783  std::vector<SDValue> AsmNodeOperands;
3784  unsigned Flag, Kind;
3785  bool Changed = false;
3786  unsigned NumOps = N->getNumOperands();
3787
3788  // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3789  // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3790  // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3791  // respectively. Since there is no constraint to explicitly specify a
3792  // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3793  // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3794  // them into a GPRPair.
3795
3796  SDLoc dl(N);
3797  SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
3798                                   : SDValue(nullptr,0);
3799
3800  SmallVector<bool, 8> OpChanged;
3801  // Glue node will be appended late.
3802  for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3803    SDValue op = N->getOperand(i);
3804    AsmNodeOperands.push_back(op);
3805
3806    if (i < InlineAsm::Op_FirstOperand)
3807      continue;
3808
3809    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3810      Flag = C->getZExtValue();
3811      Kind = InlineAsm::getKind(Flag);
3812    }
3813    else
3814      continue;
3815
3816    // Immediate operands to inline asm in the SelectionDAG are modeled with
3817    // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3818    // the second is a constant with the value of the immediate. If we get here
3819    // and we have a Kind_Imm, skip the next operand, and continue.
3820    if (Kind == InlineAsm::Kind_Imm) {
3821      SDValue op = N->getOperand(++i);
3822      AsmNodeOperands.push_back(op);
3823      continue;
3824    }
3825
3826    unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3827    if (NumRegs)
3828      OpChanged.push_back(false);
3829
3830    unsigned DefIdx = 0;
3831    bool IsTiedToChangedOp = false;
3832    // If it's a use that is tied with a previous def, it has no
3833    // reg class constraint.
3834    if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3835      IsTiedToChangedOp = OpChanged[DefIdx];
3836
3837    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3838        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3839      continue;
3840
3841    unsigned RC;
3842    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3843    if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3844        || NumRegs != 2)
3845      continue;
3846
3847    assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3848    SDValue V0 = N->getOperand(i+1);
3849    SDValue V1 = N->getOperand(i+2);
3850    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3851    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3852    SDValue PairedReg;
3853    MachineRegisterInfo &MRI = MF->getRegInfo();
3854
3855    if (Kind == InlineAsm::Kind_RegDef ||
3856        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3857      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3858      // the original GPRs.
3859
3860      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3861      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3862      SDValue Chain = SDValue(N,0);
3863
3864      SDNode *GU = N->getGluedUser();
3865      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
3866                                               Chain.getValue(1));
3867
3868      // Extract values from a GPRPair reg and copy to the original GPR reg.
3869      SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3870                                                    RegCopy);
3871      SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3872                                                    RegCopy);
3873      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
3874                                        RegCopy.getValue(1));
3875      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
3876
3877      // Update the original glue user.
3878      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
3879      Ops.push_back(T1.getValue(1));
3880      CurDAG->UpdateNodeOperands(GU, Ops);
3881    }
3882    else {
3883      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
3884      // GPRPair and then pass the GPRPair to the inline asm.
3885      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
3886
3887      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
3888      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
3889                                          Chain.getValue(1));
3890      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
3891                                          T0.getValue(1));
3892      SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
3893
3894      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
3895      // i32 VRs of inline asm with it.
3896      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3897      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3898      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
3899
3900      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
3901      Glue = Chain.getValue(1);
3902    }
3903
3904    Changed = true;
3905
3906    if(PairedReg.getNode()) {
3907      OpChanged[OpChanged.size() -1 ] = true;
3908      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
3909      if (IsTiedToChangedOp)
3910        Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
3911      else
3912        Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
3913      // Replace the current flag.
3914      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
3915          Flag, dl, MVT::i32);
3916      // Add the new register node and skip the original two GPRs.
3917      AsmNodeOperands.push_back(PairedReg);
3918      // Skip the next two GPRs.
3919      i += 2;
3920    }
3921  }
3922
3923  if (Glue.getNode())
3924    AsmNodeOperands.push_back(Glue);
3925  if (!Changed)
3926    return nullptr;
3927
3928  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
3929      CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
3930  New->setNodeId(-1);
3931  return New.getNode();
3932}
3933
3934
3935bool ARMDAGToDAGISel::
3936SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3937                             std::vector<SDValue> &OutOps) {
3938  switch(ConstraintID) {
3939  default:
3940    llvm_unreachable("Unexpected asm memory constraint");
3941  case InlineAsm::Constraint_i:
3942    // FIXME: It seems strange that 'i' is needed here since it's supposed to
3943    //        be an immediate and not a memory constraint.
3944    // Fallthrough.
3945  case InlineAsm::Constraint_m:
3946  case InlineAsm::Constraint_Q:
3947  case InlineAsm::Constraint_Um:
3948  case InlineAsm::Constraint_Un:
3949  case InlineAsm::Constraint_Uq:
3950  case InlineAsm::Constraint_Us:
3951  case InlineAsm::Constraint_Ut:
3952  case InlineAsm::Constraint_Uv:
3953  case InlineAsm::Constraint_Uy:
3954    // Require the address to be in a register.  That is safe for all ARM
3955    // variants and it is hard to do anything much smarter without knowing
3956    // how the operand is used.
3957    OutOps.push_back(Op);
3958    return false;
3959  }
3960  return true;
3961}
3962
3963/// createARMISelDag - This pass converts a legalized DAG into a
3964/// ARM-specific DAG, ready for instruction scheduling.
3965///
3966FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3967                                     CodeGenOpt::Level OptLevel) {
3968  return new ARMDAGToDAGISel(TM, OptLevel);
3969}
3970