ARMISelDAGToDAG.cpp revision 360660
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
16#include "MCTargetDesc/ARMAddressingModes.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/StringSwitch.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/SelectionDAGISel.h"
25#include "llvm/CodeGen/TargetLowering.h"
26#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/LLVMContext.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Target/TargetOptions.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "arm-isel"
40
41static cl::opt<bool>
42DisableShifterOp("disable-shifter-op", cl::Hidden,
43  cl::desc("Disable isel of shifter-op"),
44  cl::init(false));
45
46//===--------------------------------------------------------------------===//
47/// ARMDAGToDAGISel - ARM specific code to select ARM machine
48/// instructions for SelectionDAG operations.
49///
50namespace {
51
52class ARMDAGToDAGISel : public SelectionDAGISel {
53  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54  /// make the right decision when generating code for different targets.
55  const ARMSubtarget *Subtarget;
56
57public:
58  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59      : SelectionDAGISel(tm, OptLevel) {}
60
61  bool runOnMachineFunction(MachineFunction &MF) override {
62    // Reset the subtarget each time through.
63    Subtarget = &MF.getSubtarget<ARMSubtarget>();
64    SelectionDAGISel::runOnMachineFunction(MF);
65    return true;
66  }
67
68  StringRef getPassName() const override { return "ARM Instruction Selection"; }
69
70  void PreprocessISelDAG() override;
71
72  /// getI32Imm - Return a target constant of type i32 with the specified
73  /// value.
74  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
76  }
77
78  void Select(SDNode *N) override;
79
80  bool hasNoVMLxHazardUse(SDNode *N) const;
81  bool isShifterOpProfitable(const SDValue &Shift,
82                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83  bool SelectRegShifterOperand(SDValue N, SDValue &A,
84                               SDValue &B, SDValue &C,
85                               bool CheckProfitability = true);
86  bool SelectImmShifterOperand(SDValue N, SDValue &A,
87                               SDValue &B, bool CheckProfitability = true);
88  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89                                    SDValue &B, SDValue &C) {
90    // Don't apply the profitability check
91    return SelectRegShifterOperand(N, A, B, C, false);
92  }
93  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94                                    SDValue &B) {
95    // Don't apply the profitability check
96    return SelectImmShifterOperand(N, A, B, false);
97  }
98
99  bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
100
101  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
103
104  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105    const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106    Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107    Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108    return true;
109  }
110
111  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112                             SDValue &Offset, SDValue &Opc);
113  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114                             SDValue &Offset, SDValue &Opc);
115  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116                             SDValue &Offset, SDValue &Opc);
117  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118  bool SelectAddrMode3(SDValue N, SDValue &Base,
119                       SDValue &Offset, SDValue &Opc);
120  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121                             SDValue &Offset, SDValue &Opc);
122  bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
123  bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
124  bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
125  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
126  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
127
128  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
129
130  // Thumb Addressing Modes:
131  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
132  bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
133  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
134                                SDValue &OffImm);
135  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
136                                 SDValue &OffImm);
137  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
138                                 SDValue &OffImm);
139  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
140                                 SDValue &OffImm);
141  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
142
143  // Thumb 2 Addressing Modes:
144  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
145  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
146                            SDValue &OffImm);
147  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
148                                 SDValue &OffImm);
149  template<unsigned Shift>
150  bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
151                            SDValue &OffImm);
152  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
153                             SDValue &OffReg, SDValue &ShImm);
154  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
155
156  inline bool is_so_imm(unsigned Imm) const {
157    return ARM_AM::getSOImmVal(Imm) != -1;
158  }
159
160  inline bool is_so_imm_not(unsigned Imm) const {
161    return ARM_AM::getSOImmVal(~Imm) != -1;
162  }
163
164  inline bool is_t2_so_imm(unsigned Imm) const {
165    return ARM_AM::getT2SOImmVal(Imm) != -1;
166  }
167
168  inline bool is_t2_so_imm_not(unsigned Imm) const {
169    return ARM_AM::getT2SOImmVal(~Imm) != -1;
170  }
171
172  // Include the pieces autogenerated from the target description.
173#include "ARMGenDAGISel.inc"
174
175private:
176  void transferMemOperands(SDNode *Src, SDNode *Dst);
177
178  /// Indexed (pre/post inc/dec) load matching code for ARM.
179  bool tryARMIndexedLoad(SDNode *N);
180  bool tryT1IndexedLoad(SDNode *N);
181  bool tryT2IndexedLoad(SDNode *N);
182
183  /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
184  /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
185  /// loads of D registers and even subregs and odd subregs of Q registers.
186  /// For NumVecs <= 2, QOpcodes1 is not used.
187  void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
188                 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
189                 const uint16_t *QOpcodes1);
190
191  /// SelectVST - Select NEON store intrinsics.  NumVecs should
192  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
193  /// stores of D registers and even subregs and odd subregs of Q registers.
194  /// For NumVecs <= 2, QOpcodes1 is not used.
195  void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
196                 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
197                 const uint16_t *QOpcodes1);
198
199  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
200  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
201  /// load/store of D registers and Q registers.
202  void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
203                       unsigned NumVecs, const uint16_t *DOpcodes,
204                       const uint16_t *QOpcodes);
205
206  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
207  /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
208  /// for loading D registers.
209  void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
210                    unsigned NumVecs, const uint16_t *DOpcodes,
211                    const uint16_t *QOpcodes0 = nullptr,
212                    const uint16_t *QOpcodes1 = nullptr);
213
214  /// Try to select SBFX/UBFX instructions for ARM.
215  bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
216
217  // Select special operations if node forms integer ABS pattern
218  bool tryABSOp(SDNode *N);
219
220  bool tryReadRegister(SDNode *N);
221  bool tryWriteRegister(SDNode *N);
222
223  bool tryInlineAsm(SDNode *N);
224
225  void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
226
227  void SelectCMP_SWAP(SDNode *N);
228
229  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
230  /// inline asm expressions.
231  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
232                                    std::vector<SDValue> &OutOps) override;
233
234  // Form pairs of consecutive R, S, D, or Q registers.
235  SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
236  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
237  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
238  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
239
240  // Form sequences of 4 consecutive S, D, or Q registers.
241  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
242  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
243  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
244
245  // Get the alignment operand for a NEON VLD or VST instruction.
246  SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
247                        bool is64BitVector);
248
249  /// Returns the number of instructions required to materialize the given
250  /// constant in a register, or 3 if a literal pool load is needed.
251  unsigned ConstantMaterializationCost(unsigned Val) const;
252
253  /// Checks if N is a multiplication by a constant where we can extract out a
254  /// power of two from the constant so that it can be used in a shift, but only
255  /// if it simplifies the materialization of the constant. Returns true if it
256  /// is, and assigns to PowerOfTwo the power of two that should be extracted
257  /// out and to NewMulConst the new constant to be multiplied by.
258  bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
259                              unsigned &PowerOfTwo, SDValue &NewMulConst) const;
260
261  /// Replace N with M in CurDAG, in a way that also ensures that M gets
262  /// selected when N would have been selected.
263  void replaceDAGValue(const SDValue &N, SDValue M);
264};
265}
266
267/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
268/// operand. If so Imm will receive the 32-bit value.
269static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
270  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
271    Imm = cast<ConstantSDNode>(N)->getZExtValue();
272    return true;
273  }
274  return false;
275}
276
277// isInt32Immediate - This method tests to see if a constant operand.
278// If so Imm will receive the 32 bit value.
279static bool isInt32Immediate(SDValue N, unsigned &Imm) {
280  return isInt32Immediate(N.getNode(), Imm);
281}
282
283// isOpcWithIntImmediate - This method tests to see if the node is a specific
284// opcode and that it has a immediate integer right operand.
285// If so Imm will receive the 32 bit value.
286static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
287  return N->getOpcode() == Opc &&
288         isInt32Immediate(N->getOperand(1).getNode(), Imm);
289}
290
291/// Check whether a particular node is a constant value representable as
292/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
293///
294/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
295static bool isScaledConstantInRange(SDValue Node, int Scale,
296                                    int RangeMin, int RangeMax,
297                                    int &ScaledConstant) {
298  assert(Scale > 0 && "Invalid scale!");
299
300  // Check that this is a constant.
301  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
302  if (!C)
303    return false;
304
305  ScaledConstant = (int) C->getZExtValue();
306  if ((ScaledConstant % Scale) != 0)
307    return false;
308
309  ScaledConstant /= Scale;
310  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
311}
312
313void ARMDAGToDAGISel::PreprocessISelDAG() {
314  if (!Subtarget->hasV6T2Ops())
315    return;
316
317  bool isThumb2 = Subtarget->isThumb();
318  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
319       E = CurDAG->allnodes_end(); I != E; ) {
320    SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
321
322    if (N->getOpcode() != ISD::ADD)
323      continue;
324
325    // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
326    // leading zeros, followed by consecutive set bits, followed by 1 or 2
327    // trailing zeros, e.g. 1020.
328    // Transform the expression to
329    // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
330    // of trailing zeros of c2. The left shift would be folded as an shifter
331    // operand of 'add' and the 'and' and 'srl' would become a bits extraction
332    // node (UBFX).
333
334    SDValue N0 = N->getOperand(0);
335    SDValue N1 = N->getOperand(1);
336    unsigned And_imm = 0;
337    if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
338      if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
339        std::swap(N0, N1);
340    }
341    if (!And_imm)
342      continue;
343
344    // Check if the AND mask is an immediate of the form: 000.....1111111100
345    unsigned TZ = countTrailingZeros(And_imm);
346    if (TZ != 1 && TZ != 2)
347      // Be conservative here. Shifter operands aren't always free. e.g. On
348      // Swift, left shifter operand of 1 / 2 for free but others are not.
349      // e.g.
350      //  ubfx   r3, r1, #16, #8
351      //  ldr.w  r3, [r0, r3, lsl #2]
352      // vs.
353      //  mov.w  r9, #1020
354      //  and.w  r2, r9, r1, lsr #14
355      //  ldr    r2, [r0, r2]
356      continue;
357    And_imm >>= TZ;
358    if (And_imm & (And_imm + 1))
359      continue;
360
361    // Look for (and (srl X, c1), c2).
362    SDValue Srl = N1.getOperand(0);
363    unsigned Srl_imm = 0;
364    if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
365        (Srl_imm <= 2))
366      continue;
367
368    // Make sure first operand is not a shifter operand which would prevent
369    // folding of the left shift.
370    SDValue CPTmp0;
371    SDValue CPTmp1;
372    SDValue CPTmp2;
373    if (isThumb2) {
374      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
375        continue;
376    } else {
377      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
378          SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
379        continue;
380    }
381
382    // Now make the transformation.
383    Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
384                          Srl.getOperand(0),
385                          CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
386                                              MVT::i32));
387    N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
388                         Srl,
389                         CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
390    N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
391                         N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
392    CurDAG->UpdateNodeOperands(N, N0, N1);
393  }
394}
395
396/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
397/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
398/// least on current ARM implementations) which should be avoidded.
399bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
400  if (OptLevel == CodeGenOpt::None)
401    return true;
402
403  if (!Subtarget->hasVMLxHazards())
404    return true;
405
406  if (!N->hasOneUse())
407    return false;
408
409  SDNode *Use = *N->use_begin();
410  if (Use->getOpcode() == ISD::CopyToReg)
411    return true;
412  if (Use->isMachineOpcode()) {
413    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
414        CurDAG->getSubtarget().getInstrInfo());
415
416    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
417    if (MCID.mayStore())
418      return true;
419    unsigned Opcode = MCID.getOpcode();
420    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
421      return true;
422    // vmlx feeding into another vmlx. We actually want to unfold
423    // the use later in the MLxExpansion pass. e.g.
424    // vmla
425    // vmla (stall 8 cycles)
426    //
427    // vmul (5 cycles)
428    // vadd (5 cycles)
429    // vmla
430    // This adds up to about 18 - 19 cycles.
431    //
432    // vmla
433    // vmul (stall 4 cycles)
434    // vadd adds up to about 14 cycles.
435    return TII->isFpMLxInstruction(Opcode);
436  }
437
438  return false;
439}
440
441bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
442                                            ARM_AM::ShiftOpc ShOpcVal,
443                                            unsigned ShAmt) {
444  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
445    return true;
446  if (Shift.hasOneUse())
447    return true;
448  // R << 2 is free.
449  return ShOpcVal == ARM_AM::lsl &&
450         (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
451}
452
453unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
454  if (Subtarget->isThumb()) {
455    if (Val <= 255) return 1;                               // MOV
456    if (Subtarget->hasV6T2Ops() &&
457        (Val <= 0xffff ||                                   // MOV
458         ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
459         ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
460      return 1;
461    if (Val <= 510) return 2;                               // MOV + ADDi8
462    if (~Val <= 255) return 2;                              // MOV + MVN
463    if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
464  } else {
465    if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
466    if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
467    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
468    if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
469  }
470  if (Subtarget->useMovt()) return 2; // MOVW + MOVT
471  return 3; // Literal pool load
472}
473
474bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
475                                             unsigned MaxShift,
476                                             unsigned &PowerOfTwo,
477                                             SDValue &NewMulConst) const {
478  assert(N.getOpcode() == ISD::MUL);
479  assert(MaxShift > 0);
480
481  // If the multiply is used in more than one place then changing the constant
482  // will make other uses incorrect, so don't.
483  if (!N.hasOneUse()) return false;
484  // Check if the multiply is by a constant
485  ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
486  if (!MulConst) return false;
487  // If the constant is used in more than one place then modifying it will mean
488  // we need to materialize two constants instead of one, which is a bad idea.
489  if (!MulConst->hasOneUse()) return false;
490  unsigned MulConstVal = MulConst->getZExtValue();
491  if (MulConstVal == 0) return false;
492
493  // Find the largest power of 2 that MulConstVal is a multiple of
494  PowerOfTwo = MaxShift;
495  while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
496    --PowerOfTwo;
497    if (PowerOfTwo == 0) return false;
498  }
499
500  // Only optimise if the new cost is better
501  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
502  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
503  unsigned OldCost = ConstantMaterializationCost(MulConstVal);
504  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
505  return NewCost < OldCost;
506}
507
508void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
509  CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
510  ReplaceUses(N, M);
511}
512
513bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
514                                              SDValue &BaseReg,
515                                              SDValue &Opc,
516                                              bool CheckProfitability) {
517  if (DisableShifterOp)
518    return false;
519
520  // If N is a multiply-by-constant and it's profitable to extract a shift and
521  // use it in a shifted operand do so.
522  if (N.getOpcode() == ISD::MUL) {
523    unsigned PowerOfTwo = 0;
524    SDValue NewMulConst;
525    if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
526      HandleSDNode Handle(N);
527      SDLoc Loc(N);
528      replaceDAGValue(N.getOperand(1), NewMulConst);
529      BaseReg = Handle.getValue();
530      Opc = CurDAG->getTargetConstant(
531          ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
532      return true;
533    }
534  }
535
536  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
537
538  // Don't match base register only case. That is matched to a separate
539  // lower complexity pattern with explicit register operand.
540  if (ShOpcVal == ARM_AM::no_shift) return false;
541
542  BaseReg = N.getOperand(0);
543  unsigned ShImmVal = 0;
544  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
545  if (!RHS) return false;
546  ShImmVal = RHS->getZExtValue() & 31;
547  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
548                                  SDLoc(N), MVT::i32);
549  return true;
550}
551
552bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
553                                              SDValue &BaseReg,
554                                              SDValue &ShReg,
555                                              SDValue &Opc,
556                                              bool CheckProfitability) {
557  if (DisableShifterOp)
558    return false;
559
560  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
561
562  // Don't match base register only case. That is matched to a separate
563  // lower complexity pattern with explicit register operand.
564  if (ShOpcVal == ARM_AM::no_shift) return false;
565
566  BaseReg = N.getOperand(0);
567  unsigned ShImmVal = 0;
568  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
569  if (RHS) return false;
570
571  ShReg = N.getOperand(1);
572  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
573    return false;
574  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
575                                  SDLoc(N), MVT::i32);
576  return true;
577}
578
579// Determine whether an ISD::OR's operands are suitable to turn the operation
580// into an addition, which often has more compact encodings.
581bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
582  assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
583  Out = N;
584  return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
585}
586
587
588bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
589                                          SDValue &Base,
590                                          SDValue &OffImm) {
591  // Match simple R + imm12 operands.
592
593  // Base only.
594  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
595      !CurDAG->isBaseWithConstantOffset(N)) {
596    if (N.getOpcode() == ISD::FrameIndex) {
597      // Match frame index.
598      int FI = cast<FrameIndexSDNode>(N)->getIndex();
599      Base = CurDAG->getTargetFrameIndex(
600          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
601      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
602      return true;
603    }
604
605    if (N.getOpcode() == ARMISD::Wrapper &&
606        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
607        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
608        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
609      Base = N.getOperand(0);
610    } else
611      Base = N;
612    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
613    return true;
614  }
615
616  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
617    int RHSC = (int)RHS->getSExtValue();
618    if (N.getOpcode() == ISD::SUB)
619      RHSC = -RHSC;
620
621    if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
622      Base   = N.getOperand(0);
623      if (Base.getOpcode() == ISD::FrameIndex) {
624        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
625        Base = CurDAG->getTargetFrameIndex(
626            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
627      }
628      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
629      return true;
630    }
631  }
632
633  // Base only.
634  Base = N;
635  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
636  return true;
637}
638
639
640
641bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
642                                      SDValue &Opc) {
643  if (N.getOpcode() == ISD::MUL &&
644      ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
645    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
646      // X * [3,5,9] -> X + X * [2,4,8] etc.
647      int RHSC = (int)RHS->getZExtValue();
648      if (RHSC & 1) {
649        RHSC = RHSC & ~1;
650        ARM_AM::AddrOpc AddSub = ARM_AM::add;
651        if (RHSC < 0) {
652          AddSub = ARM_AM::sub;
653          RHSC = - RHSC;
654        }
655        if (isPowerOf2_32(RHSC)) {
656          unsigned ShAmt = Log2_32(RHSC);
657          Base = Offset = N.getOperand(0);
658          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
659                                                            ARM_AM::lsl),
660                                          SDLoc(N), MVT::i32);
661          return true;
662        }
663      }
664    }
665  }
666
667  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
668      // ISD::OR that is equivalent to an ISD::ADD.
669      !CurDAG->isBaseWithConstantOffset(N))
670    return false;
671
672  // Leave simple R +/- imm12 operands for LDRi12
673  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
674    int RHSC;
675    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
676                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
677      return false;
678  }
679
680  // Otherwise this is R +/- [possibly shifted] R.
681  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
682  ARM_AM::ShiftOpc ShOpcVal =
683    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
684  unsigned ShAmt = 0;
685
686  Base   = N.getOperand(0);
687  Offset = N.getOperand(1);
688
689  if (ShOpcVal != ARM_AM::no_shift) {
690    // Check to see if the RHS of the shift is a constant, if not, we can't fold
691    // it.
692    if (ConstantSDNode *Sh =
693           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
694      ShAmt = Sh->getZExtValue();
695      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
696        Offset = N.getOperand(1).getOperand(0);
697      else {
698        ShAmt = 0;
699        ShOpcVal = ARM_AM::no_shift;
700      }
701    } else {
702      ShOpcVal = ARM_AM::no_shift;
703    }
704  }
705
706  // Try matching (R shl C) + (R).
707  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
708      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
709        N.getOperand(0).hasOneUse())) {
710    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
711    if (ShOpcVal != ARM_AM::no_shift) {
712      // Check to see if the RHS of the shift is a constant, if not, we can't
713      // fold it.
714      if (ConstantSDNode *Sh =
715          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
716        ShAmt = Sh->getZExtValue();
717        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
718          Offset = N.getOperand(0).getOperand(0);
719          Base = N.getOperand(1);
720        } else {
721          ShAmt = 0;
722          ShOpcVal = ARM_AM::no_shift;
723        }
724      } else {
725        ShOpcVal = ARM_AM::no_shift;
726      }
727    }
728  }
729
730  // If Offset is a multiply-by-constant and it's profitable to extract a shift
731  // and use it in a shifted operand do so.
732  if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
733    unsigned PowerOfTwo = 0;
734    SDValue NewMulConst;
735    if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
736      HandleSDNode Handle(Offset);
737      replaceDAGValue(Offset.getOperand(1), NewMulConst);
738      Offset = Handle.getValue();
739      ShAmt = PowerOfTwo;
740      ShOpcVal = ARM_AM::lsl;
741    }
742  }
743
744  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
745                                  SDLoc(N), MVT::i32);
746  return true;
747}
748
749bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
750                                            SDValue &Offset, SDValue &Opc) {
751  unsigned Opcode = Op->getOpcode();
752  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
753    ? cast<LoadSDNode>(Op)->getAddressingMode()
754    : cast<StoreSDNode>(Op)->getAddressingMode();
755  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
756    ? ARM_AM::add : ARM_AM::sub;
757  int Val;
758  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
759    return false;
760
761  Offset = N;
762  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
763  unsigned ShAmt = 0;
764  if (ShOpcVal != ARM_AM::no_shift) {
765    // Check to see if the RHS of the shift is a constant, if not, we can't fold
766    // it.
767    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
768      ShAmt = Sh->getZExtValue();
769      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
770        Offset = N.getOperand(0);
771      else {
772        ShAmt = 0;
773        ShOpcVal = ARM_AM::no_shift;
774      }
775    } else {
776      ShOpcVal = ARM_AM::no_shift;
777    }
778  }
779
780  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
781                                  SDLoc(N), MVT::i32);
782  return true;
783}
784
785bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
786                                            SDValue &Offset, SDValue &Opc) {
787  unsigned Opcode = Op->getOpcode();
788  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
789    ? cast<LoadSDNode>(Op)->getAddressingMode()
790    : cast<StoreSDNode>(Op)->getAddressingMode();
791  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
792    ? ARM_AM::add : ARM_AM::sub;
793  int Val;
794  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
795    if (AddSub == ARM_AM::sub) Val *= -1;
796    Offset = CurDAG->getRegister(0, MVT::i32);
797    Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
798    return true;
799  }
800
801  return false;
802}
803
804
805bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
806                                            SDValue &Offset, SDValue &Opc) {
807  unsigned Opcode = Op->getOpcode();
808  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
809    ? cast<LoadSDNode>(Op)->getAddressingMode()
810    : cast<StoreSDNode>(Op)->getAddressingMode();
811  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
812    ? ARM_AM::add : ARM_AM::sub;
813  int Val;
814  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
815    Offset = CurDAG->getRegister(0, MVT::i32);
816    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
817                                                      ARM_AM::no_shift),
818                                    SDLoc(Op), MVT::i32);
819    return true;
820  }
821
822  return false;
823}
824
825bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
826  Base = N;
827  return true;
828}
829
830bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
831                                      SDValue &Base, SDValue &Offset,
832                                      SDValue &Opc) {
833  if (N.getOpcode() == ISD::SUB) {
834    // X - C  is canonicalize to X + -C, no need to handle it here.
835    Base = N.getOperand(0);
836    Offset = N.getOperand(1);
837    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
838                                    MVT::i32);
839    return true;
840  }
841
842  if (!CurDAG->isBaseWithConstantOffset(N)) {
843    Base = N;
844    if (N.getOpcode() == ISD::FrameIndex) {
845      int FI = cast<FrameIndexSDNode>(N)->getIndex();
846      Base = CurDAG->getTargetFrameIndex(
847          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
848    }
849    Offset = CurDAG->getRegister(0, MVT::i32);
850    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
851                                    MVT::i32);
852    return true;
853  }
854
855  // If the RHS is +/- imm8, fold into addr mode.
856  int RHSC;
857  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
858                              -256 + 1, 256, RHSC)) { // 8 bits.
859    Base = N.getOperand(0);
860    if (Base.getOpcode() == ISD::FrameIndex) {
861      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
862      Base = CurDAG->getTargetFrameIndex(
863          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
864    }
865    Offset = CurDAG->getRegister(0, MVT::i32);
866
867    ARM_AM::AddrOpc AddSub = ARM_AM::add;
868    if (RHSC < 0) {
869      AddSub = ARM_AM::sub;
870      RHSC = -RHSC;
871    }
872    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
873                                    MVT::i32);
874    return true;
875  }
876
877  Base = N.getOperand(0);
878  Offset = N.getOperand(1);
879  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
880                                  MVT::i32);
881  return true;
882}
883
884bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
885                                            SDValue &Offset, SDValue &Opc) {
886  unsigned Opcode = Op->getOpcode();
887  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
888    ? cast<LoadSDNode>(Op)->getAddressingMode()
889    : cast<StoreSDNode>(Op)->getAddressingMode();
890  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
891    ? ARM_AM::add : ARM_AM::sub;
892  int Val;
893  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
894    Offset = CurDAG->getRegister(0, MVT::i32);
895    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
896                                    MVT::i32);
897    return true;
898  }
899
900  Offset = N;
901  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
902                                  MVT::i32);
903  return true;
904}
905
906bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
907                                        bool FP16) {
908  if (!CurDAG->isBaseWithConstantOffset(N)) {
909    Base = N;
910    if (N.getOpcode() == ISD::FrameIndex) {
911      int FI = cast<FrameIndexSDNode>(N)->getIndex();
912      Base = CurDAG->getTargetFrameIndex(
913          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
914    } else if (N.getOpcode() == ARMISD::Wrapper &&
915               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
916               N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
917               N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
918      Base = N.getOperand(0);
919    }
920    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
921                                       SDLoc(N), MVT::i32);
922    return true;
923  }
924
925  // If the RHS is +/- imm8, fold into addr mode.
926  int RHSC;
927  const int Scale = FP16 ? 2 : 4;
928
929  if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
930    Base = N.getOperand(0);
931    if (Base.getOpcode() == ISD::FrameIndex) {
932      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
933      Base = CurDAG->getTargetFrameIndex(
934          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
935    }
936
937    ARM_AM::AddrOpc AddSub = ARM_AM::add;
938    if (RHSC < 0) {
939      AddSub = ARM_AM::sub;
940      RHSC = -RHSC;
941    }
942
943    if (FP16)
944      Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
945                                         SDLoc(N), MVT::i32);
946    else
947      Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
948                                         SDLoc(N), MVT::i32);
949
950    return true;
951  }
952
953  Base = N;
954
955  if (FP16)
956    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
957                                       SDLoc(N), MVT::i32);
958  else
959    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
960                                       SDLoc(N), MVT::i32);
961
962  return true;
963}
964
965bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
966                                      SDValue &Base, SDValue &Offset) {
967  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
968}
969
970bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
971                                          SDValue &Base, SDValue &Offset) {
972  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
973}
974
975bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
976                                      SDValue &Align) {
977  Addr = N;
978
979  unsigned Alignment = 0;
980
981  MemSDNode *MemN = cast<MemSDNode>(Parent);
982
983  if (isa<LSBaseSDNode>(MemN) ||
984      ((MemN->getOpcode() == ARMISD::VST1_UPD ||
985        MemN->getOpcode() == ARMISD::VLD1_UPD) &&
986       MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
987    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
988    // The maximum alignment is equal to the memory size being referenced.
989    unsigned MMOAlign = MemN->getAlignment();
990    unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
991    if (MMOAlign >= MemSize && MemSize > 1)
992      Alignment = MemSize;
993  } else {
994    // All other uses of addrmode6 are for intrinsics.  For now just record
995    // the raw alignment value; it will be refined later based on the legal
996    // alignment operands for the intrinsic.
997    Alignment = MemN->getAlignment();
998  }
999
1000  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1001  return true;
1002}
1003
1004bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1005                                            SDValue &Offset) {
1006  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1007  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1008  if (AM != ISD::POST_INC)
1009    return false;
1010  Offset = N;
1011  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1012    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1013      Offset = CurDAG->getRegister(0, MVT::i32);
1014  }
1015  return true;
1016}
1017
1018bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1019                                       SDValue &Offset, SDValue &Label) {
1020  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1021    Offset = N.getOperand(0);
1022    SDValue N1 = N.getOperand(1);
1023    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1024                                      SDLoc(N), MVT::i32);
1025    return true;
1026  }
1027
1028  return false;
1029}
1030
1031
1032//===----------------------------------------------------------------------===//
1033//                         Thumb Addressing Modes
1034//===----------------------------------------------------------------------===//
1035
1036static bool shouldUseZeroOffsetLdSt(SDValue N) {
1037  // Negative numbers are difficult to materialise in thumb1. If we are
1038  // selecting the add of a negative, instead try to select ri with a zero
1039  // offset, so create the add node directly which will become a sub.
1040  if (N.getOpcode() != ISD::ADD)
1041    return false;
1042
1043  // Look for an imm which is not legal for ld/st, but is legal for sub.
1044  if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1045    return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1046
1047  return false;
1048}
1049
1050bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1051                                                SDValue &Offset) {
1052  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1053    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1054    if (!NC || !NC->isNullValue())
1055      return false;
1056
1057    Base = Offset = N;
1058    return true;
1059  }
1060
1061  Base = N.getOperand(0);
1062  Offset = N.getOperand(1);
1063  return true;
1064}
1065
1066bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1067                                            SDValue &Offset) {
1068  if (shouldUseZeroOffsetLdSt(N))
1069    return false; // Select ri instead
1070  return SelectThumbAddrModeRRSext(N, Base, Offset);
1071}
1072
1073bool
1074ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1075                                          SDValue &Base, SDValue &OffImm) {
1076  if (shouldUseZeroOffsetLdSt(N)) {
1077    Base = N;
1078    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1079    return true;
1080  }
1081
1082  if (!CurDAG->isBaseWithConstantOffset(N)) {
1083    if (N.getOpcode() == ISD::ADD) {
1084      return false; // We want to select register offset instead
1085    } else if (N.getOpcode() == ARMISD::Wrapper &&
1086        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1087        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1088        N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1089        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1090      Base = N.getOperand(0);
1091    } else {
1092      Base = N;
1093    }
1094
1095    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1096    return true;
1097  }
1098
1099  // If the RHS is + imm5 * scale, fold into addr mode.
1100  int RHSC;
1101  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1102    Base = N.getOperand(0);
1103    OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1104    return true;
1105  }
1106
1107  // Offset is too large, so use register offset instead.
1108  return false;
1109}
1110
1111bool
1112ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1113                                           SDValue &OffImm) {
1114  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1115}
1116
1117bool
1118ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1119                                           SDValue &OffImm) {
1120  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1121}
1122
1123bool
1124ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1125                                           SDValue &OffImm) {
1126  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1127}
1128
1129bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1130                                            SDValue &Base, SDValue &OffImm) {
1131  if (N.getOpcode() == ISD::FrameIndex) {
1132    int FI = cast<FrameIndexSDNode>(N)->getIndex();
1133    // Only multiples of 4 are allowed for the offset, so the frame object
1134    // alignment must be at least 4.
1135    MachineFrameInfo &MFI = MF->getFrameInfo();
1136    if (MFI.getObjectAlignment(FI) < 4)
1137      MFI.setObjectAlignment(FI, 4);
1138    Base = CurDAG->getTargetFrameIndex(
1139        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1140    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1141    return true;
1142  }
1143
1144  if (!CurDAG->isBaseWithConstantOffset(N))
1145    return false;
1146
1147  if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1148    // If the RHS is + imm8 * scale, fold into addr mode.
1149    int RHSC;
1150    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1151      Base = N.getOperand(0);
1152      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1153      // Make sure the offset is inside the object, or we might fail to
1154      // allocate an emergency spill slot. (An out-of-range access is UB, but
1155      // it could show up anyway.)
1156      MachineFrameInfo &MFI = MF->getFrameInfo();
1157      if (RHSC * 4 < MFI.getObjectSize(FI)) {
1158        // For LHS+RHS to result in an offset that's a multiple of 4 the object
1159        // indexed by the LHS must be 4-byte aligned.
1160        if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1161          MFI.setObjectAlignment(FI, 4);
1162        if (MFI.getObjectAlignment(FI) >= 4) {
1163          Base = CurDAG->getTargetFrameIndex(
1164              FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1165          OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1166          return true;
1167        }
1168      }
1169    }
1170  }
1171
1172  return false;
1173}
1174
1175
1176//===----------------------------------------------------------------------===//
1177//                        Thumb 2 Addressing Modes
1178//===----------------------------------------------------------------------===//
1179
1180
1181bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1182                                            SDValue &Base, SDValue &OffImm) {
1183  // Match simple R + imm12 operands.
1184
1185  // Base only.
1186  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1187      !CurDAG->isBaseWithConstantOffset(N)) {
1188    if (N.getOpcode() == ISD::FrameIndex) {
1189      // Match frame index.
1190      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1191      Base = CurDAG->getTargetFrameIndex(
1192          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1193      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1194      return true;
1195    }
1196
1197    if (N.getOpcode() == ARMISD::Wrapper &&
1198        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1199        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1200        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1201      Base = N.getOperand(0);
1202      if (Base.getOpcode() == ISD::TargetConstantPool)
1203        return false;  // We want to select t2LDRpci instead.
1204    } else
1205      Base = N;
1206    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1207    return true;
1208  }
1209
1210  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1211    if (SelectT2AddrModeImm8(N, Base, OffImm))
1212      // Let t2LDRi8 handle (R - imm8).
1213      return false;
1214
1215    int RHSC = (int)RHS->getZExtValue();
1216    if (N.getOpcode() == ISD::SUB)
1217      RHSC = -RHSC;
1218
1219    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1220      Base   = N.getOperand(0);
1221      if (Base.getOpcode() == ISD::FrameIndex) {
1222        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1223        Base = CurDAG->getTargetFrameIndex(
1224            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225      }
1226      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1227      return true;
1228    }
1229  }
1230
1231  // Base only.
1232  Base = N;
1233  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1234  return true;
1235}
1236
1237bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1238                                           SDValue &Base, SDValue &OffImm) {
1239  // Match simple R - imm8 operands.
1240  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1241      !CurDAG->isBaseWithConstantOffset(N))
1242    return false;
1243
1244  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1245    int RHSC = (int)RHS->getSExtValue();
1246    if (N.getOpcode() == ISD::SUB)
1247      RHSC = -RHSC;
1248
1249    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1250      Base = N.getOperand(0);
1251      if (Base.getOpcode() == ISD::FrameIndex) {
1252        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253        Base = CurDAG->getTargetFrameIndex(
1254            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1255      }
1256      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1257      return true;
1258    }
1259  }
1260
1261  return false;
1262}
1263
1264bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1265                                                 SDValue &OffImm){
1266  unsigned Opcode = Op->getOpcode();
1267  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1268    ? cast<LoadSDNode>(Op)->getAddressingMode()
1269    : cast<StoreSDNode>(Op)->getAddressingMode();
1270  int RHSC;
1271  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1272    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1273      ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1274      : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1275    return true;
1276  }
1277
1278  return false;
1279}
1280
1281template<unsigned Shift>
1282bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
1283                                           SDValue &Base, SDValue &OffImm) {
1284  if (N.getOpcode() == ISD::SUB ||
1285      CurDAG->isBaseWithConstantOffset(N)) {
1286    if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1287      int RHSC = (int)RHS->getZExtValue();
1288      if (N.getOpcode() == ISD::SUB)
1289        RHSC = -RHSC;
1290
1291      if (isShiftedInt<7, Shift>(RHSC)) {
1292        Base = N.getOperand(0);
1293        if (Base.getOpcode() == ISD::FrameIndex) {
1294          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1295          Base = CurDAG->getTargetFrameIndex(
1296            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1297        }
1298        OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1299        return true;
1300      }
1301    }
1302  }
1303
1304  // Base only.
1305  Base = N;
1306  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1307  return true;
1308}
1309
1310bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1311                                            SDValue &Base,
1312                                            SDValue &OffReg, SDValue &ShImm) {
1313  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1314  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1315    return false;
1316
1317  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1318  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1319    int RHSC = (int)RHS->getZExtValue();
1320    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1321      return false;
1322    else if (RHSC < 0 && RHSC >= -255) // 8 bits
1323      return false;
1324  }
1325
1326  // Look for (R + R) or (R + (R << [1,2,3])).
1327  unsigned ShAmt = 0;
1328  Base   = N.getOperand(0);
1329  OffReg = N.getOperand(1);
1330
1331  // Swap if it is ((R << c) + R).
1332  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1333  if (ShOpcVal != ARM_AM::lsl) {
1334    ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1335    if (ShOpcVal == ARM_AM::lsl)
1336      std::swap(Base, OffReg);
1337  }
1338
1339  if (ShOpcVal == ARM_AM::lsl) {
1340    // Check to see if the RHS of the shift is a constant, if not, we can't fold
1341    // it.
1342    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1343      ShAmt = Sh->getZExtValue();
1344      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1345        OffReg = OffReg.getOperand(0);
1346      else {
1347        ShAmt = 0;
1348      }
1349    }
1350  }
1351
1352  // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1353  // and use it in a shifted operand do so.
1354  if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1355    unsigned PowerOfTwo = 0;
1356    SDValue NewMulConst;
1357    if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1358      HandleSDNode Handle(OffReg);
1359      replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1360      OffReg = Handle.getValue();
1361      ShAmt = PowerOfTwo;
1362    }
1363  }
1364
1365  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1366
1367  return true;
1368}
1369
1370bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1371                                                SDValue &OffImm) {
1372  // This *must* succeed since it's used for the irreplaceable ldrex and strex
1373  // instructions.
1374  Base = N;
1375  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1376
1377  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1378    return true;
1379
1380  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1381  if (!RHS)
1382    return true;
1383
1384  uint32_t RHSC = (int)RHS->getZExtValue();
1385  if (RHSC > 1020 || RHSC % 4 != 0)
1386    return true;
1387
1388  Base = N.getOperand(0);
1389  if (Base.getOpcode() == ISD::FrameIndex) {
1390    int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1391    Base = CurDAG->getTargetFrameIndex(
1392        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1393  }
1394
1395  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1396  return true;
1397}
1398
1399//===--------------------------------------------------------------------===//
1400
1401/// getAL - Returns a ARMCC::AL immediate node.
1402static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1403  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1404}
1405
1406void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1407  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1408  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1409}
1410
1411bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1412  LoadSDNode *LD = cast<LoadSDNode>(N);
1413  ISD::MemIndexedMode AM = LD->getAddressingMode();
1414  if (AM == ISD::UNINDEXED)
1415    return false;
1416
1417  EVT LoadedVT = LD->getMemoryVT();
1418  SDValue Offset, AMOpc;
1419  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1420  unsigned Opcode = 0;
1421  bool Match = false;
1422  if (LoadedVT == MVT::i32 && isPre &&
1423      SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1424    Opcode = ARM::LDR_PRE_IMM;
1425    Match = true;
1426  } else if (LoadedVT == MVT::i32 && !isPre &&
1427      SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1428    Opcode = ARM::LDR_POST_IMM;
1429    Match = true;
1430  } else if (LoadedVT == MVT::i32 &&
1431      SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1432    Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1433    Match = true;
1434
1435  } else if (LoadedVT == MVT::i16 &&
1436             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1437    Match = true;
1438    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1439      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1440      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1441  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1442    if (LD->getExtensionType() == ISD::SEXTLOAD) {
1443      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1444        Match = true;
1445        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1446      }
1447    } else {
1448      if (isPre &&
1449          SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1450        Match = true;
1451        Opcode = ARM::LDRB_PRE_IMM;
1452      } else if (!isPre &&
1453                  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1454        Match = true;
1455        Opcode = ARM::LDRB_POST_IMM;
1456      } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1457        Match = true;
1458        Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1459      }
1460    }
1461  }
1462
1463  if (Match) {
1464    if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1465      SDValue Chain = LD->getChain();
1466      SDValue Base = LD->getBasePtr();
1467      SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1468                       CurDAG->getRegister(0, MVT::i32), Chain };
1469      SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1470                                           MVT::Other, Ops);
1471      transferMemOperands(N, New);
1472      ReplaceNode(N, New);
1473      return true;
1474    } else {
1475      SDValue Chain = LD->getChain();
1476      SDValue Base = LD->getBasePtr();
1477      SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1478                       CurDAG->getRegister(0, MVT::i32), Chain };
1479      SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1480                                           MVT::Other, Ops);
1481      transferMemOperands(N, New);
1482      ReplaceNode(N, New);
1483      return true;
1484    }
1485  }
1486
1487  return false;
1488}
1489
1490bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1491  LoadSDNode *LD = cast<LoadSDNode>(N);
1492  EVT LoadedVT = LD->getMemoryVT();
1493  ISD::MemIndexedMode AM = LD->getAddressingMode();
1494  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1495      LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1496    return false;
1497
1498  auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1499  if (!COffs || COffs->getZExtValue() != 4)
1500    return false;
1501
1502  // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1503  // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1504  // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1505  // ISel.
1506  SDValue Chain = LD->getChain();
1507  SDValue Base = LD->getBasePtr();
1508  SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1509                   CurDAG->getRegister(0, MVT::i32), Chain };
1510  SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1511                                       MVT::i32, MVT::Other, Ops);
1512  transferMemOperands(N, New);
1513  ReplaceNode(N, New);
1514  return true;
1515}
1516
1517bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1518  LoadSDNode *LD = cast<LoadSDNode>(N);
1519  ISD::MemIndexedMode AM = LD->getAddressingMode();
1520  if (AM == ISD::UNINDEXED)
1521    return false;
1522
1523  EVT LoadedVT = LD->getMemoryVT();
1524  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1525  SDValue Offset;
1526  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1527  unsigned Opcode = 0;
1528  bool Match = false;
1529  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1530    switch (LoadedVT.getSimpleVT().SimpleTy) {
1531    case MVT::i32:
1532      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1533      break;
1534    case MVT::i16:
1535      if (isSExtLd)
1536        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1537      else
1538        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1539      break;
1540    case MVT::i8:
1541    case MVT::i1:
1542      if (isSExtLd)
1543        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1544      else
1545        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1546      break;
1547    default:
1548      return false;
1549    }
1550    Match = true;
1551  }
1552
1553  if (Match) {
1554    SDValue Chain = LD->getChain();
1555    SDValue Base = LD->getBasePtr();
1556    SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1557                     CurDAG->getRegister(0, MVT::i32), Chain };
1558    SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1559                                         MVT::Other, Ops);
1560    transferMemOperands(N, New);
1561    ReplaceNode(N, New);
1562    return true;
1563  }
1564
1565  return false;
1566}
1567
1568/// Form a GPRPair pseudo register from a pair of GPR regs.
1569SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1570  SDLoc dl(V0.getNode());
1571  SDValue RegClass =
1572    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1573  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1574  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1575  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1576  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1577}
1578
1579/// Form a D register from a pair of S registers.
1580SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1581  SDLoc dl(V0.getNode());
1582  SDValue RegClass =
1583    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1584  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1585  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1586  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1587  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1588}
1589
1590/// Form a quad register from a pair of D registers.
1591SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1592  SDLoc dl(V0.getNode());
1593  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1594                                               MVT::i32);
1595  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1596  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1597  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1598  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1599}
1600
1601/// Form 4 consecutive D registers from a pair of Q registers.
1602SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1603  SDLoc dl(V0.getNode());
1604  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1605                                               MVT::i32);
1606  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1607  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1608  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1609  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1610}
1611
1612/// Form 4 consecutive S registers.
1613SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1614                                   SDValue V2, SDValue V3) {
1615  SDLoc dl(V0.getNode());
1616  SDValue RegClass =
1617    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1618  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1619  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1620  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1621  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1622  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1623                                    V2, SubReg2, V3, SubReg3 };
1624  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1625}
1626
1627/// Form 4 consecutive D registers.
1628SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1629                                   SDValue V2, SDValue V3) {
1630  SDLoc dl(V0.getNode());
1631  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1632                                               MVT::i32);
1633  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1634  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1635  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1636  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1637  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1638                                    V2, SubReg2, V3, SubReg3 };
1639  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1640}
1641
1642/// Form 4 consecutive Q registers.
1643SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1644                                   SDValue V2, SDValue V3) {
1645  SDLoc dl(V0.getNode());
1646  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1647                                               MVT::i32);
1648  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1649  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1650  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1651  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1652  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1653                                    V2, SubReg2, V3, SubReg3 };
1654  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1655}
1656
1657/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1658/// of a NEON VLD or VST instruction.  The supported values depend on the
1659/// number of registers being loaded.
1660SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1661                                       unsigned NumVecs, bool is64BitVector) {
1662  unsigned NumRegs = NumVecs;
1663  if (!is64BitVector && NumVecs < 3)
1664    NumRegs *= 2;
1665
1666  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1667  if (Alignment >= 32 && NumRegs == 4)
1668    Alignment = 32;
1669  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1670    Alignment = 16;
1671  else if (Alignment >= 8)
1672    Alignment = 8;
1673  else
1674    Alignment = 0;
1675
1676  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1677}
1678
1679static bool isVLDfixed(unsigned Opc)
1680{
1681  switch (Opc) {
1682  default: return false;
1683  case ARM::VLD1d8wb_fixed : return true;
1684  case ARM::VLD1d16wb_fixed : return true;
1685  case ARM::VLD1d64Qwb_fixed : return true;
1686  case ARM::VLD1d32wb_fixed : return true;
1687  case ARM::VLD1d64wb_fixed : return true;
1688  case ARM::VLD1d64TPseudoWB_fixed : return true;
1689  case ARM::VLD1d64QPseudoWB_fixed : return true;
1690  case ARM::VLD1q8wb_fixed : return true;
1691  case ARM::VLD1q16wb_fixed : return true;
1692  case ARM::VLD1q32wb_fixed : return true;
1693  case ARM::VLD1q64wb_fixed : return true;
1694  case ARM::VLD1DUPd8wb_fixed : return true;
1695  case ARM::VLD1DUPd16wb_fixed : return true;
1696  case ARM::VLD1DUPd32wb_fixed : return true;
1697  case ARM::VLD1DUPq8wb_fixed : return true;
1698  case ARM::VLD1DUPq16wb_fixed : return true;
1699  case ARM::VLD1DUPq32wb_fixed : return true;
1700  case ARM::VLD2d8wb_fixed : return true;
1701  case ARM::VLD2d16wb_fixed : return true;
1702  case ARM::VLD2d32wb_fixed : return true;
1703  case ARM::VLD2q8PseudoWB_fixed : return true;
1704  case ARM::VLD2q16PseudoWB_fixed : return true;
1705  case ARM::VLD2q32PseudoWB_fixed : return true;
1706  case ARM::VLD2DUPd8wb_fixed : return true;
1707  case ARM::VLD2DUPd16wb_fixed : return true;
1708  case ARM::VLD2DUPd32wb_fixed : return true;
1709  }
1710}
1711
1712static bool isVSTfixed(unsigned Opc)
1713{
1714  switch (Opc) {
1715  default: return false;
1716  case ARM::VST1d8wb_fixed : return true;
1717  case ARM::VST1d16wb_fixed : return true;
1718  case ARM::VST1d32wb_fixed : return true;
1719  case ARM::VST1d64wb_fixed : return true;
1720  case ARM::VST1q8wb_fixed : return true;
1721  case ARM::VST1q16wb_fixed : return true;
1722  case ARM::VST1q32wb_fixed : return true;
1723  case ARM::VST1q64wb_fixed : return true;
1724  case ARM::VST1d64TPseudoWB_fixed : return true;
1725  case ARM::VST1d64QPseudoWB_fixed : return true;
1726  case ARM::VST2d8wb_fixed : return true;
1727  case ARM::VST2d16wb_fixed : return true;
1728  case ARM::VST2d32wb_fixed : return true;
1729  case ARM::VST2q8PseudoWB_fixed : return true;
1730  case ARM::VST2q16PseudoWB_fixed : return true;
1731  case ARM::VST2q32PseudoWB_fixed : return true;
1732  }
1733}
1734
1735// Get the register stride update opcode of a VLD/VST instruction that
1736// is otherwise equivalent to the given fixed stride updating instruction.
1737static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1738  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1739    && "Incorrect fixed stride updating instruction.");
1740  switch (Opc) {
1741  default: break;
1742  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1743  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1744  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1745  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1746  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1747  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1748  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1749  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1750  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1751  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1752  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1753  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1754  case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1755  case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1756  case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1757  case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1758  case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1759  case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1760
1761  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1762  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1763  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1764  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1765  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1766  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1767  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1768  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1769  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1770  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1771
1772  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1773  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1774  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1775  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1776  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1777  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1778
1779  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1780  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1781  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1782  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1783  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1784  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1785
1786  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1787  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1788  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1789  }
1790  return Opc; // If not one we handle, return it unchanged.
1791}
1792
1793/// Returns true if the given increment is a Constant known to be equal to the
1794/// access size performed by a NEON load/store. This means the "[rN]!" form can
1795/// be used.
1796static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1797  auto C = dyn_cast<ConstantSDNode>(Inc);
1798  return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1799}
1800
1801void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1802                                const uint16_t *DOpcodes,
1803                                const uint16_t *QOpcodes0,
1804                                const uint16_t *QOpcodes1) {
1805  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1806  SDLoc dl(N);
1807
1808  SDValue MemAddr, Align;
1809  bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1810                                   // nodes are not intrinsics.
1811  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1812  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1813    return;
1814
1815  SDValue Chain = N->getOperand(0);
1816  EVT VT = N->getValueType(0);
1817  bool is64BitVector = VT.is64BitVector();
1818  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1819
1820  unsigned OpcodeIndex;
1821  switch (VT.getSimpleVT().SimpleTy) {
1822  default: llvm_unreachable("unhandled vld type");
1823    // Double-register operations:
1824  case MVT::v8i8:  OpcodeIndex = 0; break;
1825  case MVT::v4f16:
1826  case MVT::v4i16: OpcodeIndex = 1; break;
1827  case MVT::v2f32:
1828  case MVT::v2i32: OpcodeIndex = 2; break;
1829  case MVT::v1i64: OpcodeIndex = 3; break;
1830    // Quad-register operations:
1831  case MVT::v16i8: OpcodeIndex = 0; break;
1832  case MVT::v8f16:
1833  case MVT::v8i16: OpcodeIndex = 1; break;
1834  case MVT::v4f32:
1835  case MVT::v4i32: OpcodeIndex = 2; break;
1836  case MVT::v2f64:
1837  case MVT::v2i64: OpcodeIndex = 3; break;
1838  }
1839
1840  EVT ResTy;
1841  if (NumVecs == 1)
1842    ResTy = VT;
1843  else {
1844    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1845    if (!is64BitVector)
1846      ResTyElts *= 2;
1847    ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1848  }
1849  std::vector<EVT> ResTys;
1850  ResTys.push_back(ResTy);
1851  if (isUpdating)
1852    ResTys.push_back(MVT::i32);
1853  ResTys.push_back(MVT::Other);
1854
1855  SDValue Pred = getAL(CurDAG, dl);
1856  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1857  SDNode *VLd;
1858  SmallVector<SDValue, 7> Ops;
1859
1860  // Double registers and VLD1/VLD2 quad registers are directly supported.
1861  if (is64BitVector || NumVecs <= 2) {
1862    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1863                    QOpcodes0[OpcodeIndex]);
1864    Ops.push_back(MemAddr);
1865    Ops.push_back(Align);
1866    if (isUpdating) {
1867      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1868      bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1869      if (!IsImmUpdate) {
1870        // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1871        // check for the opcode rather than the number of vector elements.
1872        if (isVLDfixed(Opc))
1873          Opc = getVLDSTRegisterUpdateOpcode(Opc);
1874        Ops.push_back(Inc);
1875      // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1876      // the operands if not such an opcode.
1877      } else if (!isVLDfixed(Opc))
1878        Ops.push_back(Reg0);
1879    }
1880    Ops.push_back(Pred);
1881    Ops.push_back(Reg0);
1882    Ops.push_back(Chain);
1883    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1884
1885  } else {
1886    // Otherwise, quad registers are loaded with two separate instructions,
1887    // where one loads the even registers and the other loads the odd registers.
1888    EVT AddrTy = MemAddr.getValueType();
1889
1890    // Load the even subregs.  This is always an updating load, so that it
1891    // provides the address to the second load for the odd subregs.
1892    SDValue ImplDef =
1893      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1894    const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1895    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1896                                          ResTy, AddrTy, MVT::Other, OpsA);
1897    Chain = SDValue(VLdA, 2);
1898
1899    // Load the odd subregs.
1900    Ops.push_back(SDValue(VLdA, 1));
1901    Ops.push_back(Align);
1902    if (isUpdating) {
1903      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1904      assert(isa<ConstantSDNode>(Inc.getNode()) &&
1905             "only constant post-increment update allowed for VLD3/4");
1906      (void)Inc;
1907      Ops.push_back(Reg0);
1908    }
1909    Ops.push_back(SDValue(VLdA, 0));
1910    Ops.push_back(Pred);
1911    Ops.push_back(Reg0);
1912    Ops.push_back(Chain);
1913    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1914  }
1915
1916  // Transfer memoperands.
1917  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1918  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1919
1920  if (NumVecs == 1) {
1921    ReplaceNode(N, VLd);
1922    return;
1923  }
1924
1925  // Extract out the subregisters.
1926  SDValue SuperReg = SDValue(VLd, 0);
1927  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1928                    ARM::qsub_3 == ARM::qsub_0 + 3,
1929                "Unexpected subreg numbering");
1930  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1931  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1932    ReplaceUses(SDValue(N, Vec),
1933                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1934  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1935  if (isUpdating)
1936    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1937  CurDAG->RemoveDeadNode(N);
1938}
1939
1940void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1941                                const uint16_t *DOpcodes,
1942                                const uint16_t *QOpcodes0,
1943                                const uint16_t *QOpcodes1) {
1944  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1945  SDLoc dl(N);
1946
1947  SDValue MemAddr, Align;
1948  bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1949                                   // nodes are not intrinsics.
1950  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1951  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1952  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1953    return;
1954
1955  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1956
1957  SDValue Chain = N->getOperand(0);
1958  EVT VT = N->getOperand(Vec0Idx).getValueType();
1959  bool is64BitVector = VT.is64BitVector();
1960  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1961
1962  unsigned OpcodeIndex;
1963  switch (VT.getSimpleVT().SimpleTy) {
1964  default: llvm_unreachable("unhandled vst type");
1965    // Double-register operations:
1966  case MVT::v8i8:  OpcodeIndex = 0; break;
1967  case MVT::v4f16:
1968  case MVT::v4i16: OpcodeIndex = 1; break;
1969  case MVT::v2f32:
1970  case MVT::v2i32: OpcodeIndex = 2; break;
1971  case MVT::v1i64: OpcodeIndex = 3; break;
1972    // Quad-register operations:
1973  case MVT::v16i8: OpcodeIndex = 0; break;
1974  case MVT::v8f16:
1975  case MVT::v8i16: OpcodeIndex = 1; break;
1976  case MVT::v4f32:
1977  case MVT::v4i32: OpcodeIndex = 2; break;
1978  case MVT::v2f64:
1979  case MVT::v2i64: OpcodeIndex = 3; break;
1980  }
1981
1982  std::vector<EVT> ResTys;
1983  if (isUpdating)
1984    ResTys.push_back(MVT::i32);
1985  ResTys.push_back(MVT::Other);
1986
1987  SDValue Pred = getAL(CurDAG, dl);
1988  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1989  SmallVector<SDValue, 7> Ops;
1990
1991  // Double registers and VST1/VST2 quad registers are directly supported.
1992  if (is64BitVector || NumVecs <= 2) {
1993    SDValue SrcReg;
1994    if (NumVecs == 1) {
1995      SrcReg = N->getOperand(Vec0Idx);
1996    } else if (is64BitVector) {
1997      // Form a REG_SEQUENCE to force register allocation.
1998      SDValue V0 = N->getOperand(Vec0Idx + 0);
1999      SDValue V1 = N->getOperand(Vec0Idx + 1);
2000      if (NumVecs == 2)
2001        SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2002      else {
2003        SDValue V2 = N->getOperand(Vec0Idx + 2);
2004        // If it's a vst3, form a quad D-register and leave the last part as
2005        // an undef.
2006        SDValue V3 = (NumVecs == 3)
2007          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2008          : N->getOperand(Vec0Idx + 3);
2009        SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2010      }
2011    } else {
2012      // Form a QQ register.
2013      SDValue Q0 = N->getOperand(Vec0Idx);
2014      SDValue Q1 = N->getOperand(Vec0Idx + 1);
2015      SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2016    }
2017
2018    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2019                    QOpcodes0[OpcodeIndex]);
2020    Ops.push_back(MemAddr);
2021    Ops.push_back(Align);
2022    if (isUpdating) {
2023      SDValue Inc = N->getOperand(AddrOpIdx + 1);
2024      bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2025      if (!IsImmUpdate) {
2026        // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2027        // check for the opcode rather than the number of vector elements.
2028        if (isVSTfixed(Opc))
2029          Opc = getVLDSTRegisterUpdateOpcode(Opc);
2030        Ops.push_back(Inc);
2031      }
2032      // VST1/VST2 fixed increment does not need Reg0 so only include it in
2033      // the operands if not such an opcode.
2034      else if (!isVSTfixed(Opc))
2035        Ops.push_back(Reg0);
2036    }
2037    Ops.push_back(SrcReg);
2038    Ops.push_back(Pred);
2039    Ops.push_back(Reg0);
2040    Ops.push_back(Chain);
2041    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2042
2043    // Transfer memoperands.
2044    CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2045
2046    ReplaceNode(N, VSt);
2047    return;
2048  }
2049
2050  // Otherwise, quad registers are stored with two separate instructions,
2051  // where one stores the even registers and the other stores the odd registers.
2052
2053  // Form the QQQQ REG_SEQUENCE.
2054  SDValue V0 = N->getOperand(Vec0Idx + 0);
2055  SDValue V1 = N->getOperand(Vec0Idx + 1);
2056  SDValue V2 = N->getOperand(Vec0Idx + 2);
2057  SDValue V3 = (NumVecs == 3)
2058    ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2059    : N->getOperand(Vec0Idx + 3);
2060  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2061
2062  // Store the even D registers.  This is always an updating store, so that it
2063  // provides the address to the second store for the odd subregs.
2064  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2065  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2066                                        MemAddr.getValueType(),
2067                                        MVT::Other, OpsA);
2068  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2069  Chain = SDValue(VStA, 1);
2070
2071  // Store the odd D registers.
2072  Ops.push_back(SDValue(VStA, 0));
2073  Ops.push_back(Align);
2074  if (isUpdating) {
2075    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2076    assert(isa<ConstantSDNode>(Inc.getNode()) &&
2077           "only constant post-increment update allowed for VST3/4");
2078    (void)Inc;
2079    Ops.push_back(Reg0);
2080  }
2081  Ops.push_back(RegSeq);
2082  Ops.push_back(Pred);
2083  Ops.push_back(Reg0);
2084  Ops.push_back(Chain);
2085  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2086                                        Ops);
2087  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2088  ReplaceNode(N, VStB);
2089}
2090
2091void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2092                                      unsigned NumVecs,
2093                                      const uint16_t *DOpcodes,
2094                                      const uint16_t *QOpcodes) {
2095  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2096  SDLoc dl(N);
2097
2098  SDValue MemAddr, Align;
2099  bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2100                                   // nodes are not intrinsics.
2101  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2102  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2103  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2104    return;
2105
2106  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2107
2108  SDValue Chain = N->getOperand(0);
2109  unsigned Lane =
2110    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2111  EVT VT = N->getOperand(Vec0Idx).getValueType();
2112  bool is64BitVector = VT.is64BitVector();
2113
2114  unsigned Alignment = 0;
2115  if (NumVecs != 3) {
2116    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2117    unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2118    if (Alignment > NumBytes)
2119      Alignment = NumBytes;
2120    if (Alignment < 8 && Alignment < NumBytes)
2121      Alignment = 0;
2122    // Alignment must be a power of two; make sure of that.
2123    Alignment = (Alignment & -Alignment);
2124    if (Alignment == 1)
2125      Alignment = 0;
2126  }
2127  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2128
2129  unsigned OpcodeIndex;
2130  switch (VT.getSimpleVT().SimpleTy) {
2131  default: llvm_unreachable("unhandled vld/vst lane type");
2132    // Double-register operations:
2133  case MVT::v8i8:  OpcodeIndex = 0; break;
2134  case MVT::v4f16:
2135  case MVT::v4i16: OpcodeIndex = 1; break;
2136  case MVT::v2f32:
2137  case MVT::v2i32: OpcodeIndex = 2; break;
2138    // Quad-register operations:
2139  case MVT::v8f16:
2140  case MVT::v8i16: OpcodeIndex = 0; break;
2141  case MVT::v4f32:
2142  case MVT::v4i32: OpcodeIndex = 1; break;
2143  }
2144
2145  std::vector<EVT> ResTys;
2146  if (IsLoad) {
2147    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2148    if (!is64BitVector)
2149      ResTyElts *= 2;
2150    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2151                                      MVT::i64, ResTyElts));
2152  }
2153  if (isUpdating)
2154    ResTys.push_back(MVT::i32);
2155  ResTys.push_back(MVT::Other);
2156
2157  SDValue Pred = getAL(CurDAG, dl);
2158  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2159
2160  SmallVector<SDValue, 8> Ops;
2161  Ops.push_back(MemAddr);
2162  Ops.push_back(Align);
2163  if (isUpdating) {
2164    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2165    bool IsImmUpdate =
2166        isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2167    Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2168  }
2169
2170  SDValue SuperReg;
2171  SDValue V0 = N->getOperand(Vec0Idx + 0);
2172  SDValue V1 = N->getOperand(Vec0Idx + 1);
2173  if (NumVecs == 2) {
2174    if (is64BitVector)
2175      SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2176    else
2177      SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2178  } else {
2179    SDValue V2 = N->getOperand(Vec0Idx + 2);
2180    SDValue V3 = (NumVecs == 3)
2181      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2182      : N->getOperand(Vec0Idx + 3);
2183    if (is64BitVector)
2184      SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2185    else
2186      SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2187  }
2188  Ops.push_back(SuperReg);
2189  Ops.push_back(getI32Imm(Lane, dl));
2190  Ops.push_back(Pred);
2191  Ops.push_back(Reg0);
2192  Ops.push_back(Chain);
2193
2194  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2195                                  QOpcodes[OpcodeIndex]);
2196  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2197  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2198  if (!IsLoad) {
2199    ReplaceNode(N, VLdLn);
2200    return;
2201  }
2202
2203  // Extract the subregisters.
2204  SuperReg = SDValue(VLdLn, 0);
2205  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2206                    ARM::qsub_3 == ARM::qsub_0 + 3,
2207                "Unexpected subreg numbering");
2208  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2209  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2210    ReplaceUses(SDValue(N, Vec),
2211                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2212  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2213  if (isUpdating)
2214    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2215  CurDAG->RemoveDeadNode(N);
2216}
2217
2218void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2219                                   bool isUpdating, unsigned NumVecs,
2220                                   const uint16_t *DOpcodes,
2221                                   const uint16_t *QOpcodes0,
2222                                   const uint16_t *QOpcodes1) {
2223  assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2224  SDLoc dl(N);
2225
2226  SDValue MemAddr, Align;
2227  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2228  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2229    return;
2230
2231  SDValue Chain = N->getOperand(0);
2232  EVT VT = N->getValueType(0);
2233  bool is64BitVector = VT.is64BitVector();
2234
2235  unsigned Alignment = 0;
2236  if (NumVecs != 3) {
2237    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2238    unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2239    if (Alignment > NumBytes)
2240      Alignment = NumBytes;
2241    if (Alignment < 8 && Alignment < NumBytes)
2242      Alignment = 0;
2243    // Alignment must be a power of two; make sure of that.
2244    Alignment = (Alignment & -Alignment);
2245    if (Alignment == 1)
2246      Alignment = 0;
2247  }
2248  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2249
2250  unsigned OpcodeIndex;
2251  switch (VT.getSimpleVT().SimpleTy) {
2252  default: llvm_unreachable("unhandled vld-dup type");
2253  case MVT::v8i8:
2254  case MVT::v16i8: OpcodeIndex = 0; break;
2255  case MVT::v4i16:
2256  case MVT::v8i16:
2257  case MVT::v4f16:
2258  case MVT::v8f16:
2259                  OpcodeIndex = 1; break;
2260  case MVT::v2f32:
2261  case MVT::v2i32:
2262  case MVT::v4f32:
2263  case MVT::v4i32: OpcodeIndex = 2; break;
2264  case MVT::v1f64:
2265  case MVT::v1i64: OpcodeIndex = 3; break;
2266  }
2267
2268  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2269  if (!is64BitVector)
2270    ResTyElts *= 2;
2271  EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2272
2273  std::vector<EVT> ResTys;
2274  ResTys.push_back(ResTy);
2275  if (isUpdating)
2276    ResTys.push_back(MVT::i32);
2277  ResTys.push_back(MVT::Other);
2278
2279  SDValue Pred = getAL(CurDAG, dl);
2280  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2281
2282  SDNode *VLdDup;
2283  if (is64BitVector || NumVecs == 1) {
2284    SmallVector<SDValue, 6> Ops;
2285    Ops.push_back(MemAddr);
2286    Ops.push_back(Align);
2287    unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2288                                   QOpcodes0[OpcodeIndex];
2289    if (isUpdating) {
2290      // fixed-stride update instructions don't have an explicit writeback
2291      // operand. It's implicit in the opcode itself.
2292      SDValue Inc = N->getOperand(2);
2293      bool IsImmUpdate =
2294          isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2295      if (NumVecs <= 2 && !IsImmUpdate)
2296        Opc = getVLDSTRegisterUpdateOpcode(Opc);
2297      if (!IsImmUpdate)
2298        Ops.push_back(Inc);
2299      // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2300      else if (NumVecs > 2)
2301        Ops.push_back(Reg0);
2302    }
2303    Ops.push_back(Pred);
2304    Ops.push_back(Reg0);
2305    Ops.push_back(Chain);
2306    VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2307  } else if (NumVecs == 2) {
2308    const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2309    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2310                                          dl, ResTys, OpsA);
2311
2312    Chain = SDValue(VLdA, 1);
2313    const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2314    VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2315  } else {
2316    SDValue ImplDef =
2317      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2318    const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2319    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2320                                          dl, ResTys, OpsA);
2321
2322    SDValue SuperReg = SDValue(VLdA, 0);
2323    Chain = SDValue(VLdA, 1);
2324    const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2325    VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2326  }
2327
2328  // Transfer memoperands.
2329  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2330  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2331
2332  // Extract the subregisters.
2333  if (NumVecs == 1) {
2334    ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2335  } else {
2336    SDValue SuperReg = SDValue(VLdDup, 0);
2337    static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2338    unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2339    for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2340      ReplaceUses(SDValue(N, Vec),
2341                  CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2342    }
2343  }
2344  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2345  if (isUpdating)
2346    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2347  CurDAG->RemoveDeadNode(N);
2348}
2349
2350bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2351  if (!Subtarget->hasV6T2Ops())
2352    return false;
2353
2354  unsigned Opc = isSigned
2355    ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2356    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2357  SDLoc dl(N);
2358
2359  // For unsigned extracts, check for a shift right and mask
2360  unsigned And_imm = 0;
2361  if (N->getOpcode() == ISD::AND) {
2362    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2363
2364      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2365      if (And_imm & (And_imm + 1))
2366        return false;
2367
2368      unsigned Srl_imm = 0;
2369      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2370                                Srl_imm)) {
2371        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2372
2373        // Mask off the unnecessary bits of the AND immediate; normally
2374        // DAGCombine will do this, but that might not happen if
2375        // targetShrinkDemandedConstant chooses a different immediate.
2376        And_imm &= -1U >> Srl_imm;
2377
2378        // Note: The width operand is encoded as width-1.
2379        unsigned Width = countTrailingOnes(And_imm) - 1;
2380        unsigned LSB = Srl_imm;
2381
2382        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2383
2384        if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2385          // It's cheaper to use a right shift to extract the top bits.
2386          if (Subtarget->isThumb()) {
2387            Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2388            SDValue Ops[] = { N->getOperand(0).getOperand(0),
2389                              CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2390                              getAL(CurDAG, dl), Reg0, Reg0 };
2391            CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2392            return true;
2393          }
2394
2395          // ARM models shift instructions as MOVsi with shifter operand.
2396          ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2397          SDValue ShOpc =
2398            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2399                                      MVT::i32);
2400          SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2401                            getAL(CurDAG, dl), Reg0, Reg0 };
2402          CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2403          return true;
2404        }
2405
2406        assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2407        SDValue Ops[] = { N->getOperand(0).getOperand(0),
2408                          CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2409                          CurDAG->getTargetConstant(Width, dl, MVT::i32),
2410                          getAL(CurDAG, dl), Reg0 };
2411        CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2412        return true;
2413      }
2414    }
2415    return false;
2416  }
2417
2418  // Otherwise, we're looking for a shift of a shift
2419  unsigned Shl_imm = 0;
2420  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2421    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2422    unsigned Srl_imm = 0;
2423    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2424      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2425      // Note: The width operand is encoded as width-1.
2426      unsigned Width = 32 - Srl_imm - 1;
2427      int LSB = Srl_imm - Shl_imm;
2428      if (LSB < 0)
2429        return false;
2430      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2431      assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2432      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2433                        CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2434                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
2435                        getAL(CurDAG, dl), Reg0 };
2436      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2437      return true;
2438    }
2439  }
2440
2441  // Or we are looking for a shift of an and, with a mask operand
2442  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2443      isShiftedMask_32(And_imm)) {
2444    unsigned Srl_imm = 0;
2445    unsigned LSB = countTrailingZeros(And_imm);
2446    // Shift must be the same as the ands lsb
2447    if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2448      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2449      unsigned MSB = 31 - countLeadingZeros(And_imm);
2450      // Note: The width operand is encoded as width-1.
2451      unsigned Width = MSB - LSB;
2452      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2453      assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2454      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2455                        CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2456                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
2457                        getAL(CurDAG, dl), Reg0 };
2458      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2459      return true;
2460    }
2461  }
2462
2463  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2464    unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2465    unsigned LSB = 0;
2466    if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2467        !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2468      return false;
2469
2470    if (LSB + Width > 32)
2471      return false;
2472
2473    SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2474    assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2475    SDValue Ops[] = { N->getOperand(0).getOperand(0),
2476                      CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2477                      CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2478                      getAL(CurDAG, dl), Reg0 };
2479    CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2480    return true;
2481  }
2482
2483  return false;
2484}
2485
2486/// Target-specific DAG combining for ISD::XOR.
2487/// Target-independent combining lowers SELECT_CC nodes of the form
2488/// select_cc setg[ge] X,  0,  X, -X
2489/// select_cc setgt    X, -1,  X, -X
2490/// select_cc setl[te] X,  0, -X,  X
2491/// select_cc setlt    X,  1, -X,  X
2492/// which represent Integer ABS into:
2493/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2494/// ARM instruction selection detects the latter and matches it to
2495/// ARM::ABS or ARM::t2ABS machine node.
2496bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2497  SDValue XORSrc0 = N->getOperand(0);
2498  SDValue XORSrc1 = N->getOperand(1);
2499  EVT VT = N->getValueType(0);
2500
2501  if (Subtarget->isThumb1Only())
2502    return false;
2503
2504  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2505    return false;
2506
2507  SDValue ADDSrc0 = XORSrc0.getOperand(0);
2508  SDValue ADDSrc1 = XORSrc0.getOperand(1);
2509  SDValue SRASrc0 = XORSrc1.getOperand(0);
2510  SDValue SRASrc1 = XORSrc1.getOperand(1);
2511  ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2512  EVT XType = SRASrc0.getValueType();
2513  unsigned Size = XType.getSizeInBits() - 1;
2514
2515  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2516      XType.isInteger() && SRAConstant != nullptr &&
2517      Size == SRAConstant->getZExtValue()) {
2518    unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2519    CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2520    return true;
2521  }
2522
2523  return false;
2524}
2525
2526/// We've got special pseudo-instructions for these
2527void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2528  unsigned Opcode;
2529  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2530  if (MemTy == MVT::i8)
2531    Opcode = ARM::CMP_SWAP_8;
2532  else if (MemTy == MVT::i16)
2533    Opcode = ARM::CMP_SWAP_16;
2534  else if (MemTy == MVT::i32)
2535    Opcode = ARM::CMP_SWAP_32;
2536  else
2537    llvm_unreachable("Unknown AtomicCmpSwap type");
2538
2539  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2540                   N->getOperand(0)};
2541  SDNode *CmpSwap = CurDAG->getMachineNode(
2542      Opcode, SDLoc(N),
2543      CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2544
2545  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2546  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2547
2548  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2549  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2550  CurDAG->RemoveDeadNode(N);
2551}
2552
2553static Optional<std::pair<unsigned, unsigned>>
2554getContiguousRangeOfSetBits(const APInt &A) {
2555  unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2556  unsigned LastOne = A.countTrailingZeros();
2557  if (A.countPopulation() != (FirstOne - LastOne + 1))
2558    return Optional<std::pair<unsigned,unsigned>>();
2559  return std::make_pair(FirstOne, LastOne);
2560}
2561
2562void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2563  assert(N->getOpcode() == ARMISD::CMPZ);
2564  SwitchEQNEToPLMI = false;
2565
2566  if (!Subtarget->isThumb())
2567    // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2568    // LSR don't exist as standalone instructions - they need the barrel shifter.
2569    return;
2570
2571  // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2572  SDValue And = N->getOperand(0);
2573  if (!And->hasOneUse())
2574    return;
2575
2576  SDValue Zero = N->getOperand(1);
2577  if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2578      And->getOpcode() != ISD::AND)
2579    return;
2580  SDValue X = And.getOperand(0);
2581  auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2582
2583  if (!C)
2584    return;
2585  auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2586  if (!Range)
2587    return;
2588
2589  // There are several ways to lower this:
2590  SDNode *NewN;
2591  SDLoc dl(N);
2592
2593  auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2594    if (Subtarget->isThumb2()) {
2595      Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2596      SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2597                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2598                        CurDAG->getRegister(0, MVT::i32) };
2599      return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2600    } else {
2601      SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2602                       CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2603                       getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2604      return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2605    }
2606  };
2607
2608  if (Range->second == 0) {
2609    //  1. Mask includes the LSB -> Simply shift the top N bits off
2610    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2611    ReplaceNode(And.getNode(), NewN);
2612  } else if (Range->first == 31) {
2613    //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2614    NewN = EmitShift(ARM::tLSRri, X, Range->second);
2615    ReplaceNode(And.getNode(), NewN);
2616  } else if (Range->first == Range->second) {
2617    //  3. Only one bit is set. We can shift this into the sign bit and use a
2618    //     PL/MI comparison.
2619    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2620    ReplaceNode(And.getNode(), NewN);
2621
2622    SwitchEQNEToPLMI = true;
2623  } else if (!Subtarget->hasV6T2Ops()) {
2624    //  4. Do a double shift to clear bottom and top bits, but only in
2625    //     thumb-1 mode as in thumb-2 we can use UBFX.
2626    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2627    NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2628                     Range->second + (31 - Range->first));
2629    ReplaceNode(And.getNode(), NewN);
2630  }
2631
2632}
2633
2634void ARMDAGToDAGISel::Select(SDNode *N) {
2635  SDLoc dl(N);
2636
2637  if (N->isMachineOpcode()) {
2638    N->setNodeId(-1);
2639    return;   // Already selected.
2640  }
2641
2642  switch (N->getOpcode()) {
2643  default: break;
2644  case ISD::STORE: {
2645    // For Thumb1, match an sp-relative store in C++. This is a little
2646    // unfortunate, but I don't think I can make the chain check work
2647    // otherwise.  (The chain of the store has to be the same as the chain
2648    // of the CopyFromReg, or else we can't replace the CopyFromReg with
2649    // a direct reference to "SP".)
2650    //
2651    // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2652    // a different addressing mode from other four-byte stores.
2653    //
2654    // This pattern usually comes up with call arguments.
2655    StoreSDNode *ST = cast<StoreSDNode>(N);
2656    SDValue Ptr = ST->getBasePtr();
2657    if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2658      int RHSC = 0;
2659      if (Ptr.getOpcode() == ISD::ADD &&
2660          isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2661        Ptr = Ptr.getOperand(0);
2662
2663      if (Ptr.getOpcode() == ISD::CopyFromReg &&
2664          cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2665          Ptr.getOperand(0) == ST->getChain()) {
2666        SDValue Ops[] = {ST->getValue(),
2667                         CurDAG->getRegister(ARM::SP, MVT::i32),
2668                         CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2669                         getAL(CurDAG, dl),
2670                         CurDAG->getRegister(0, MVT::i32),
2671                         ST->getChain()};
2672        MachineSDNode *ResNode =
2673            CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2674        MachineMemOperand *MemOp = ST->getMemOperand();
2675        CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2676        ReplaceNode(N, ResNode);
2677        return;
2678      }
2679    }
2680    break;
2681  }
2682  case ISD::WRITE_REGISTER:
2683    if (tryWriteRegister(N))
2684      return;
2685    break;
2686  case ISD::READ_REGISTER:
2687    if (tryReadRegister(N))
2688      return;
2689    break;
2690  case ISD::INLINEASM:
2691  case ISD::INLINEASM_BR:
2692    if (tryInlineAsm(N))
2693      return;
2694    break;
2695  case ISD::XOR:
2696    // Select special operations if XOR node forms integer ABS pattern
2697    if (tryABSOp(N))
2698      return;
2699    // Other cases are autogenerated.
2700    break;
2701  case ISD::Constant: {
2702    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2703    // If we can't materialize the constant we need to use a literal pool
2704    if (ConstantMaterializationCost(Val) > 2) {
2705      SDValue CPIdx = CurDAG->getTargetConstantPool(
2706          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2707          TLI->getPointerTy(CurDAG->getDataLayout()));
2708
2709      SDNode *ResNode;
2710      if (Subtarget->isThumb()) {
2711        SDValue Ops[] = {
2712          CPIdx,
2713          getAL(CurDAG, dl),
2714          CurDAG->getRegister(0, MVT::i32),
2715          CurDAG->getEntryNode()
2716        };
2717        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2718                                         Ops);
2719      } else {
2720        SDValue Ops[] = {
2721          CPIdx,
2722          CurDAG->getTargetConstant(0, dl, MVT::i32),
2723          getAL(CurDAG, dl),
2724          CurDAG->getRegister(0, MVT::i32),
2725          CurDAG->getEntryNode()
2726        };
2727        ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2728                                         Ops);
2729      }
2730      // Annotate the Node with memory operand information so that MachineInstr
2731      // queries work properly. This e.g. gives the register allocation the
2732      // required information for rematerialization.
2733      MachineFunction& MF = CurDAG->getMachineFunction();
2734      MachineMemOperand *MemOp =
2735          MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2736                                  MachineMemOperand::MOLoad, 4, 4);
2737
2738      CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2739
2740      ReplaceNode(N, ResNode);
2741      return;
2742    }
2743
2744    // Other cases are autogenerated.
2745    break;
2746  }
2747  case ISD::FrameIndex: {
2748    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2749    int FI = cast<FrameIndexSDNode>(N)->getIndex();
2750    SDValue TFI = CurDAG->getTargetFrameIndex(
2751        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2752    if (Subtarget->isThumb1Only()) {
2753      // Set the alignment of the frame object to 4, to avoid having to generate
2754      // more than one ADD
2755      MachineFrameInfo &MFI = MF->getFrameInfo();
2756      if (MFI.getObjectAlignment(FI) < 4)
2757        MFI.setObjectAlignment(FI, 4);
2758      CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2759                           CurDAG->getTargetConstant(0, dl, MVT::i32));
2760      return;
2761    } else {
2762      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2763                      ARM::t2ADDri : ARM::ADDri);
2764      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2765                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2766                        CurDAG->getRegister(0, MVT::i32) };
2767      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2768      return;
2769    }
2770  }
2771  case ISD::SRL:
2772    if (tryV6T2BitfieldExtractOp(N, false))
2773      return;
2774    break;
2775  case ISD::SIGN_EXTEND_INREG:
2776  case ISD::SRA:
2777    if (tryV6T2BitfieldExtractOp(N, true))
2778      return;
2779    break;
2780  case ISD::MUL:
2781    if (Subtarget->isThumb1Only())
2782      break;
2783    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2784      unsigned RHSV = C->getZExtValue();
2785      if (!RHSV) break;
2786      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2787        unsigned ShImm = Log2_32(RHSV-1);
2788        if (ShImm >= 32)
2789          break;
2790        SDValue V = N->getOperand(0);
2791        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2792        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2793        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2794        if (Subtarget->isThumb()) {
2795          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2796          CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2797          return;
2798        } else {
2799          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2800                            Reg0 };
2801          CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2802          return;
2803        }
2804      }
2805      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2806        unsigned ShImm = Log2_32(RHSV+1);
2807        if (ShImm >= 32)
2808          break;
2809        SDValue V = N->getOperand(0);
2810        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2811        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2812        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2813        if (Subtarget->isThumb()) {
2814          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2815          CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2816          return;
2817        } else {
2818          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2819                            Reg0 };
2820          CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2821          return;
2822        }
2823      }
2824    }
2825    break;
2826  case ISD::AND: {
2827    // Check for unsigned bitfield extract
2828    if (tryV6T2BitfieldExtractOp(N, false))
2829      return;
2830
2831    // If an immediate is used in an AND node, it is possible that the immediate
2832    // can be more optimally materialized when negated. If this is the case we
2833    // can negate the immediate and use a BIC instead.
2834    auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2835    if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2836      uint32_t Imm = (uint32_t) N1C->getZExtValue();
2837
2838      // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2839      // immediate can be negated and fit in the immediate operand of
2840      // a t2BIC, don't do any manual transform here as this can be
2841      // handled by the generic ISel machinery.
2842      bool PreferImmediateEncoding =
2843        Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2844      if (!PreferImmediateEncoding &&
2845          ConstantMaterializationCost(Imm) >
2846              ConstantMaterializationCost(~Imm)) {
2847        // The current immediate costs more to materialize than a negated
2848        // immediate, so negate the immediate and use a BIC.
2849        SDValue NewImm =
2850          CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2851        // If the new constant didn't exist before, reposition it in the topological
2852        // ordering so it is just before N. Otherwise, don't touch its location.
2853        if (NewImm->getNodeId() == -1)
2854          CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2855
2856        if (!Subtarget->hasThumb2()) {
2857          SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2858                           N->getOperand(0), NewImm, getAL(CurDAG, dl),
2859                           CurDAG->getRegister(0, MVT::i32)};
2860          ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2861          return;
2862        } else {
2863          SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2864                           CurDAG->getRegister(0, MVT::i32),
2865                           CurDAG->getRegister(0, MVT::i32)};
2866          ReplaceNode(N,
2867                      CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2868          return;
2869        }
2870      }
2871    }
2872
2873    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2874    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2875    // are entirely contributed by c2 and lower 16-bits are entirely contributed
2876    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2877    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2878    EVT VT = N->getValueType(0);
2879    if (VT != MVT::i32)
2880      break;
2881    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2882      ? ARM::t2MOVTi16
2883      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2884    if (!Opc)
2885      break;
2886    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2887    N1C = dyn_cast<ConstantSDNode>(N1);
2888    if (!N1C)
2889      break;
2890    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2891      SDValue N2 = N0.getOperand(1);
2892      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2893      if (!N2C)
2894        break;
2895      unsigned N1CVal = N1C->getZExtValue();
2896      unsigned N2CVal = N2C->getZExtValue();
2897      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2898          (N1CVal & 0xffffU) == 0xffffU &&
2899          (N2CVal & 0xffffU) == 0x0U) {
2900        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2901                                                  dl, MVT::i32);
2902        SDValue Ops[] = { N0.getOperand(0), Imm16,
2903                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2904        ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2905        return;
2906      }
2907    }
2908
2909    break;
2910  }
2911  case ARMISD::UMAAL: {
2912    unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2913    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2914                      N->getOperand(2), N->getOperand(3),
2915                      getAL(CurDAG, dl),
2916                      CurDAG->getRegister(0, MVT::i32) };
2917    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2918    return;
2919  }
2920  case ARMISD::UMLAL:{
2921    if (Subtarget->isThumb()) {
2922      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2923                        N->getOperand(3), getAL(CurDAG, dl),
2924                        CurDAG->getRegister(0, MVT::i32)};
2925      ReplaceNode(
2926          N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2927      return;
2928    }else{
2929      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2930                        N->getOperand(3), getAL(CurDAG, dl),
2931                        CurDAG->getRegister(0, MVT::i32),
2932                        CurDAG->getRegister(0, MVT::i32) };
2933      ReplaceNode(N, CurDAG->getMachineNode(
2934                         Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2935                         MVT::i32, MVT::i32, Ops));
2936      return;
2937    }
2938  }
2939  case ARMISD::SMLAL:{
2940    if (Subtarget->isThumb()) {
2941      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2942                        N->getOperand(3), getAL(CurDAG, dl),
2943                        CurDAG->getRegister(0, MVT::i32)};
2944      ReplaceNode(
2945          N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2946      return;
2947    }else{
2948      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2949                        N->getOperand(3), getAL(CurDAG, dl),
2950                        CurDAG->getRegister(0, MVT::i32),
2951                        CurDAG->getRegister(0, MVT::i32) };
2952      ReplaceNode(N, CurDAG->getMachineNode(
2953                         Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2954                         MVT::i32, MVT::i32, Ops));
2955      return;
2956    }
2957  }
2958  case ARMISD::SUBE: {
2959    if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2960      break;
2961    // Look for a pattern to match SMMLS
2962    // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2963    if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2964        N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2965        !SDValue(N, 1).use_empty())
2966      break;
2967
2968    if (Subtarget->isThumb())
2969      assert(Subtarget->hasThumb2() &&
2970             "This pattern should not be generated for Thumb");
2971
2972    SDValue SmulLoHi = N->getOperand(1);
2973    SDValue Subc = N->getOperand(2);
2974    auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2975
2976    if (!Zero || Zero->getZExtValue() != 0 ||
2977        Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2978        N->getOperand(1) != SmulLoHi.getValue(1) ||
2979        N->getOperand(2) != Subc.getValue(1))
2980      break;
2981
2982    unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2983    SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2984                      N->getOperand(0), getAL(CurDAG, dl),
2985                      CurDAG->getRegister(0, MVT::i32) };
2986    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2987    return;
2988  }
2989  case ISD::LOAD: {
2990    if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2991      if (tryT2IndexedLoad(N))
2992        return;
2993    } else if (Subtarget->isThumb()) {
2994      if (tryT1IndexedLoad(N))
2995        return;
2996    } else if (tryARMIndexedLoad(N))
2997      return;
2998    // Other cases are autogenerated.
2999    break;
3000  }
3001  case ARMISD::WLS: {
3002    SDValue Ops[] = { N->getOperand(1),   // Loop count
3003                      N->getOperand(2),   // Exit target
3004                      N->getOperand(0) };
3005    SDNode *LoopStart =
3006      CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops);
3007    ReplaceUses(N, LoopStart);
3008    CurDAG->RemoveDeadNode(N);
3009    return;
3010  }
3011  case ARMISD::BRCOND: {
3012    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3013    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3014    // Pattern complexity = 6  cost = 1  size = 0
3015
3016    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3017    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3018    // Pattern complexity = 6  cost = 1  size = 0
3019
3020    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3021    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3022    // Pattern complexity = 6  cost = 1  size = 0
3023
3024    unsigned Opc = Subtarget->isThumb() ?
3025      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3026    SDValue Chain = N->getOperand(0);
3027    SDValue N1 = N->getOperand(1);
3028    SDValue N2 = N->getOperand(2);
3029    SDValue N3 = N->getOperand(3);
3030    SDValue InFlag = N->getOperand(4);
3031    assert(N1.getOpcode() == ISD::BasicBlock);
3032    assert(N2.getOpcode() == ISD::Constant);
3033    assert(N3.getOpcode() == ISD::Register);
3034
3035    unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3036
3037    if (InFlag.getOpcode() == ARMISD::CMPZ) {
3038      if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3039        SDValue Int = InFlag.getOperand(0);
3040        uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3041
3042        // Handle low-overhead loops.
3043        if (ID == Intrinsic::loop_decrement_reg) {
3044          SDValue Elements = Int.getOperand(2);
3045          SDValue Size = CurDAG->getTargetConstant(
3046            cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3047                                 MVT::i32);
3048
3049          SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3050          SDNode *LoopDec =
3051            CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3052                                   CurDAG->getVTList(MVT::i32, MVT::Other),
3053                                   Args);
3054          ReplaceUses(Int.getNode(), LoopDec);
3055
3056          SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3057          SDNode *LoopEnd =
3058            CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3059
3060          ReplaceUses(N, LoopEnd);
3061          CurDAG->RemoveDeadNode(N);
3062          CurDAG->RemoveDeadNode(InFlag.getNode());
3063          CurDAG->RemoveDeadNode(Int.getNode());
3064          return;
3065        }
3066      }
3067
3068      bool SwitchEQNEToPLMI;
3069      SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3070      InFlag = N->getOperand(4);
3071
3072      if (SwitchEQNEToPLMI) {
3073        switch ((ARMCC::CondCodes)CC) {
3074        default: llvm_unreachable("CMPZ must be either NE or EQ!");
3075        case ARMCC::NE:
3076          CC = (unsigned)ARMCC::MI;
3077          break;
3078        case ARMCC::EQ:
3079          CC = (unsigned)ARMCC::PL;
3080          break;
3081        }
3082      }
3083    }
3084
3085    SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3086    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3087    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3088                                             MVT::Glue, Ops);
3089    Chain = SDValue(ResNode, 0);
3090    if (N->getNumValues() == 2) {
3091      InFlag = SDValue(ResNode, 1);
3092      ReplaceUses(SDValue(N, 1), InFlag);
3093    }
3094    ReplaceUses(SDValue(N, 0),
3095                SDValue(Chain.getNode(), Chain.getResNo()));
3096    CurDAG->RemoveDeadNode(N);
3097    return;
3098  }
3099
3100  case ARMISD::CMPZ: {
3101    // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3102    //   This allows us to avoid materializing the expensive negative constant.
3103    //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3104    //   for its glue output.
3105    SDValue X = N->getOperand(0);
3106    auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3107    if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3108      int64_t Addend = -C->getSExtValue();
3109
3110      SDNode *Add = nullptr;
3111      // ADDS can be better than CMN if the immediate fits in a
3112      // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3113      // Outside that range we can just use a CMN which is 32-bit but has a
3114      // 12-bit immediate range.
3115      if (Addend < 1<<8) {
3116        if (Subtarget->isThumb2()) {
3117          SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3118                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3119                            CurDAG->getRegister(0, MVT::i32) };
3120          Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3121        } else {
3122          unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3123          SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3124                           CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3125                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3126          Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3127        }
3128      }
3129      if (Add) {
3130        SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3131        CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3132      }
3133    }
3134    // Other cases are autogenerated.
3135    break;
3136  }
3137
3138  case ARMISD::CMOV: {
3139    SDValue InFlag = N->getOperand(4);
3140
3141    if (InFlag.getOpcode() == ARMISD::CMPZ) {
3142      bool SwitchEQNEToPLMI;
3143      SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3144
3145      if (SwitchEQNEToPLMI) {
3146        SDValue ARMcc = N->getOperand(2);
3147        ARMCC::CondCodes CC =
3148          (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3149
3150        switch (CC) {
3151        default: llvm_unreachable("CMPZ must be either NE or EQ!");
3152        case ARMCC::NE:
3153          CC = ARMCC::MI;
3154          break;
3155        case ARMCC::EQ:
3156          CC = ARMCC::PL;
3157          break;
3158        }
3159        SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3160        SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3161                         N->getOperand(3), N->getOperand(4)};
3162        CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3163      }
3164
3165    }
3166    // Other cases are autogenerated.
3167    break;
3168  }
3169
3170  case ARMISD::VZIP: {
3171    unsigned Opc = 0;
3172    EVT VT = N->getValueType(0);
3173    switch (VT.getSimpleVT().SimpleTy) {
3174    default: return;
3175    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3176    case MVT::v4f16:
3177    case MVT::v4i16: Opc = ARM::VZIPd16; break;
3178    case MVT::v2f32:
3179    // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3180    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3181    case MVT::v16i8: Opc = ARM::VZIPq8; break;
3182    case MVT::v8f16:
3183    case MVT::v8i16: Opc = ARM::VZIPq16; break;
3184    case MVT::v4f32:
3185    case MVT::v4i32: Opc = ARM::VZIPq32; break;
3186    }
3187    SDValue Pred = getAL(CurDAG, dl);
3188    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3189    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3190    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3191    return;
3192  }
3193  case ARMISD::VUZP: {
3194    unsigned Opc = 0;
3195    EVT VT = N->getValueType(0);
3196    switch (VT.getSimpleVT().SimpleTy) {
3197    default: return;
3198    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3199    case MVT::v4f16:
3200    case MVT::v4i16: Opc = ARM::VUZPd16; break;
3201    case MVT::v2f32:
3202    // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3203    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3204    case MVT::v16i8: Opc = ARM::VUZPq8; break;
3205    case MVT::v8f16:
3206    case MVT::v8i16: Opc = ARM::VUZPq16; break;
3207    case MVT::v4f32:
3208    case MVT::v4i32: Opc = ARM::VUZPq32; break;
3209    }
3210    SDValue Pred = getAL(CurDAG, dl);
3211    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3212    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3213    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3214    return;
3215  }
3216  case ARMISD::VTRN: {
3217    unsigned Opc = 0;
3218    EVT VT = N->getValueType(0);
3219    switch (VT.getSimpleVT().SimpleTy) {
3220    default: return;
3221    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3222    case MVT::v4f16:
3223    case MVT::v4i16: Opc = ARM::VTRNd16; break;
3224    case MVT::v2f32:
3225    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3226    case MVT::v16i8: Opc = ARM::VTRNq8; break;
3227    case MVT::v8f16:
3228    case MVT::v8i16: Opc = ARM::VTRNq16; break;
3229    case MVT::v4f32:
3230    case MVT::v4i32: Opc = ARM::VTRNq32; break;
3231    }
3232    SDValue Pred = getAL(CurDAG, dl);
3233    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3234    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3235    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3236    return;
3237  }
3238  case ARMISD::BUILD_VECTOR: {
3239    EVT VecVT = N->getValueType(0);
3240    EVT EltVT = VecVT.getVectorElementType();
3241    unsigned NumElts = VecVT.getVectorNumElements();
3242    if (EltVT == MVT::f64) {
3243      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3244      ReplaceNode(
3245          N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3246      return;
3247    }
3248    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3249    if (NumElts == 2) {
3250      ReplaceNode(
3251          N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3252      return;
3253    }
3254    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3255    ReplaceNode(N,
3256                createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3257                                    N->getOperand(2), N->getOperand(3)));
3258    return;
3259  }
3260
3261  case ARMISD::VLD1DUP: {
3262    static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3263                                         ARM::VLD1DUPd32 };
3264    static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3265                                         ARM::VLD1DUPq32 };
3266    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3267    return;
3268  }
3269
3270  case ARMISD::VLD2DUP: {
3271    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3272                                        ARM::VLD2DUPd32 };
3273    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3274    return;
3275  }
3276
3277  case ARMISD::VLD3DUP: {
3278    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3279                                        ARM::VLD3DUPd16Pseudo,
3280                                        ARM::VLD3DUPd32Pseudo };
3281    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3282    return;
3283  }
3284
3285  case ARMISD::VLD4DUP: {
3286    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3287                                        ARM::VLD4DUPd16Pseudo,
3288                                        ARM::VLD4DUPd32Pseudo };
3289    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3290    return;
3291  }
3292
3293  case ARMISD::VLD1DUP_UPD: {
3294    static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3295                                         ARM::VLD1DUPd16wb_fixed,
3296                                         ARM::VLD1DUPd32wb_fixed };
3297    static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3298                                         ARM::VLD1DUPq16wb_fixed,
3299                                         ARM::VLD1DUPq32wb_fixed };
3300    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3301    return;
3302  }
3303
3304  case ARMISD::VLD2DUP_UPD: {
3305    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3306                                        ARM::VLD2DUPd16wb_fixed,
3307                                        ARM::VLD2DUPd32wb_fixed };
3308    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3309    return;
3310  }
3311
3312  case ARMISD::VLD3DUP_UPD: {
3313    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3314                                        ARM::VLD3DUPd16Pseudo_UPD,
3315                                        ARM::VLD3DUPd32Pseudo_UPD };
3316    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3317    return;
3318  }
3319
3320  case ARMISD::VLD4DUP_UPD: {
3321    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3322                                        ARM::VLD4DUPd16Pseudo_UPD,
3323                                        ARM::VLD4DUPd32Pseudo_UPD };
3324    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3325    return;
3326  }
3327
3328  case ARMISD::VLD1_UPD: {
3329    static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3330                                         ARM::VLD1d16wb_fixed,
3331                                         ARM::VLD1d32wb_fixed,
3332                                         ARM::VLD1d64wb_fixed };
3333    static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3334                                         ARM::VLD1q16wb_fixed,
3335                                         ARM::VLD1q32wb_fixed,
3336                                         ARM::VLD1q64wb_fixed };
3337    SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3338    return;
3339  }
3340
3341  case ARMISD::VLD2_UPD: {
3342    static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3343                                         ARM::VLD2d16wb_fixed,
3344                                         ARM::VLD2d32wb_fixed,
3345                                         ARM::VLD1q64wb_fixed};
3346    static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3347                                         ARM::VLD2q16PseudoWB_fixed,
3348                                         ARM::VLD2q32PseudoWB_fixed };
3349    SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3350    return;
3351  }
3352
3353  case ARMISD::VLD3_UPD: {
3354    static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3355                                         ARM::VLD3d16Pseudo_UPD,
3356                                         ARM::VLD3d32Pseudo_UPD,
3357                                         ARM::VLD1d64TPseudoWB_fixed};
3358    static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3359                                          ARM::VLD3q16Pseudo_UPD,
3360                                          ARM::VLD3q32Pseudo_UPD };
3361    static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3362                                          ARM::VLD3q16oddPseudo_UPD,
3363                                          ARM::VLD3q32oddPseudo_UPD };
3364    SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3365    return;
3366  }
3367
3368  case ARMISD::VLD4_UPD: {
3369    static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3370                                         ARM::VLD4d16Pseudo_UPD,
3371                                         ARM::VLD4d32Pseudo_UPD,
3372                                         ARM::VLD1d64QPseudoWB_fixed};
3373    static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3374                                          ARM::VLD4q16Pseudo_UPD,
3375                                          ARM::VLD4q32Pseudo_UPD };
3376    static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3377                                          ARM::VLD4q16oddPseudo_UPD,
3378                                          ARM::VLD4q32oddPseudo_UPD };
3379    SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3380    return;
3381  }
3382
3383  case ARMISD::VLD2LN_UPD: {
3384    static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3385                                         ARM::VLD2LNd16Pseudo_UPD,
3386                                         ARM::VLD2LNd32Pseudo_UPD };
3387    static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3388                                         ARM::VLD2LNq32Pseudo_UPD };
3389    SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3390    return;
3391  }
3392
3393  case ARMISD::VLD3LN_UPD: {
3394    static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3395                                         ARM::VLD3LNd16Pseudo_UPD,
3396                                         ARM::VLD3LNd32Pseudo_UPD };
3397    static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3398                                         ARM::VLD3LNq32Pseudo_UPD };
3399    SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3400    return;
3401  }
3402
3403  case ARMISD::VLD4LN_UPD: {
3404    static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3405                                         ARM::VLD4LNd16Pseudo_UPD,
3406                                         ARM::VLD4LNd32Pseudo_UPD };
3407    static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3408                                         ARM::VLD4LNq32Pseudo_UPD };
3409    SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3410    return;
3411  }
3412
3413  case ARMISD::VST1_UPD: {
3414    static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3415                                         ARM::VST1d16wb_fixed,
3416                                         ARM::VST1d32wb_fixed,
3417                                         ARM::VST1d64wb_fixed };
3418    static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3419                                         ARM::VST1q16wb_fixed,
3420                                         ARM::VST1q32wb_fixed,
3421                                         ARM::VST1q64wb_fixed };
3422    SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3423    return;
3424  }
3425
3426  case ARMISD::VST2_UPD: {
3427    static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3428                                         ARM::VST2d16wb_fixed,
3429                                         ARM::VST2d32wb_fixed,
3430                                         ARM::VST1q64wb_fixed};
3431    static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3432                                         ARM::VST2q16PseudoWB_fixed,
3433                                         ARM::VST2q32PseudoWB_fixed };
3434    SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3435    return;
3436  }
3437
3438  case ARMISD::VST3_UPD: {
3439    static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3440                                         ARM::VST3d16Pseudo_UPD,
3441                                         ARM::VST3d32Pseudo_UPD,
3442                                         ARM::VST1d64TPseudoWB_fixed};
3443    static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3444                                          ARM::VST3q16Pseudo_UPD,
3445                                          ARM::VST3q32Pseudo_UPD };
3446    static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3447                                          ARM::VST3q16oddPseudo_UPD,
3448                                          ARM::VST3q32oddPseudo_UPD };
3449    SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3450    return;
3451  }
3452
3453  case ARMISD::VST4_UPD: {
3454    static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3455                                         ARM::VST4d16Pseudo_UPD,
3456                                         ARM::VST4d32Pseudo_UPD,
3457                                         ARM::VST1d64QPseudoWB_fixed};
3458    static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3459                                          ARM::VST4q16Pseudo_UPD,
3460                                          ARM::VST4q32Pseudo_UPD };
3461    static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3462                                          ARM::VST4q16oddPseudo_UPD,
3463                                          ARM::VST4q32oddPseudo_UPD };
3464    SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3465    return;
3466  }
3467
3468  case ARMISD::VST2LN_UPD: {
3469    static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3470                                         ARM::VST2LNd16Pseudo_UPD,
3471                                         ARM::VST2LNd32Pseudo_UPD };
3472    static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3473                                         ARM::VST2LNq32Pseudo_UPD };
3474    SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3475    return;
3476  }
3477
3478  case ARMISD::VST3LN_UPD: {
3479    static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3480                                         ARM::VST3LNd16Pseudo_UPD,
3481                                         ARM::VST3LNd32Pseudo_UPD };
3482    static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3483                                         ARM::VST3LNq32Pseudo_UPD };
3484    SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3485    return;
3486  }
3487
3488  case ARMISD::VST4LN_UPD: {
3489    static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3490                                         ARM::VST4LNd16Pseudo_UPD,
3491                                         ARM::VST4LNd32Pseudo_UPD };
3492    static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3493                                         ARM::VST4LNq32Pseudo_UPD };
3494    SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3495    return;
3496  }
3497
3498  case ISD::INTRINSIC_VOID:
3499  case ISD::INTRINSIC_W_CHAIN: {
3500    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3501    switch (IntNo) {
3502    default:
3503      break;
3504
3505    case Intrinsic::arm_mrrc:
3506    case Intrinsic::arm_mrrc2: {
3507      SDLoc dl(N);
3508      SDValue Chain = N->getOperand(0);
3509      unsigned Opc;
3510
3511      if (Subtarget->isThumb())
3512        Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3513      else
3514        Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3515
3516      SmallVector<SDValue, 5> Ops;
3517      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3518      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3519      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3520
3521      // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3522      // instruction will always be '1111' but it is possible in assembly language to specify
3523      // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3524      if (Opc != ARM::MRRC2) {
3525        Ops.push_back(getAL(CurDAG, dl));
3526        Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3527      }
3528
3529      Ops.push_back(Chain);
3530
3531      // Writes to two registers.
3532      const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3533
3534      ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3535      return;
3536    }
3537    case Intrinsic::arm_ldaexd:
3538    case Intrinsic::arm_ldrexd: {
3539      SDLoc dl(N);
3540      SDValue Chain = N->getOperand(0);
3541      SDValue MemAddr = N->getOperand(2);
3542      bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3543
3544      bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3545      unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3546                                : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3547
3548      // arm_ldrexd returns a i64 value in {i32, i32}
3549      std::vector<EVT> ResTys;
3550      if (isThumb) {
3551        ResTys.push_back(MVT::i32);
3552        ResTys.push_back(MVT::i32);
3553      } else
3554        ResTys.push_back(MVT::Untyped);
3555      ResTys.push_back(MVT::Other);
3556
3557      // Place arguments in the right order.
3558      SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3559                       CurDAG->getRegister(0, MVT::i32), Chain};
3560      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3561      // Transfer memoperands.
3562      MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3563      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3564
3565      // Remap uses.
3566      SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3567      if (!SDValue(N, 0).use_empty()) {
3568        SDValue Result;
3569        if (isThumb)
3570          Result = SDValue(Ld, 0);
3571        else {
3572          SDValue SubRegIdx =
3573            CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3574          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3575              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3576          Result = SDValue(ResNode,0);
3577        }
3578        ReplaceUses(SDValue(N, 0), Result);
3579      }
3580      if (!SDValue(N, 1).use_empty()) {
3581        SDValue Result;
3582        if (isThumb)
3583          Result = SDValue(Ld, 1);
3584        else {
3585          SDValue SubRegIdx =
3586            CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3587          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3588              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3589          Result = SDValue(ResNode,0);
3590        }
3591        ReplaceUses(SDValue(N, 1), Result);
3592      }
3593      ReplaceUses(SDValue(N, 2), OutChain);
3594      CurDAG->RemoveDeadNode(N);
3595      return;
3596    }
3597    case Intrinsic::arm_stlexd:
3598    case Intrinsic::arm_strexd: {
3599      SDLoc dl(N);
3600      SDValue Chain = N->getOperand(0);
3601      SDValue Val0 = N->getOperand(2);
3602      SDValue Val1 = N->getOperand(3);
3603      SDValue MemAddr = N->getOperand(4);
3604
3605      // Store exclusive double return a i32 value which is the return status
3606      // of the issued store.
3607      const EVT ResTys[] = {MVT::i32, MVT::Other};
3608
3609      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3610      // Place arguments in the right order.
3611      SmallVector<SDValue, 7> Ops;
3612      if (isThumb) {
3613        Ops.push_back(Val0);
3614        Ops.push_back(Val1);
3615      } else
3616        // arm_strexd uses GPRPair.
3617        Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3618      Ops.push_back(MemAddr);
3619      Ops.push_back(getAL(CurDAG, dl));
3620      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3621      Ops.push_back(Chain);
3622
3623      bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3624      unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3625                                : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3626
3627      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3628      // Transfer memoperands.
3629      MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3630      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3631
3632      ReplaceNode(N, St);
3633      return;
3634    }
3635
3636    case Intrinsic::arm_neon_vld1: {
3637      static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3638                                           ARM::VLD1d32, ARM::VLD1d64 };
3639      static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3640                                           ARM::VLD1q32, ARM::VLD1q64};
3641      SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3642      return;
3643    }
3644
3645    case Intrinsic::arm_neon_vld1x2: {
3646      static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3647                                           ARM::VLD1q32, ARM::VLD1q64 };
3648      static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3649                                           ARM::VLD1d16QPseudo,
3650                                           ARM::VLD1d32QPseudo,
3651                                           ARM::VLD1d64QPseudo };
3652      SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3653      return;
3654    }
3655
3656    case Intrinsic::arm_neon_vld1x3: {
3657      static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3658                                           ARM::VLD1d16TPseudo,
3659                                           ARM::VLD1d32TPseudo,
3660                                           ARM::VLD1d64TPseudo };
3661      static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3662                                            ARM::VLD1q16LowTPseudo_UPD,
3663                                            ARM::VLD1q32LowTPseudo_UPD,
3664                                            ARM::VLD1q64LowTPseudo_UPD };
3665      static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3666                                            ARM::VLD1q16HighTPseudo,
3667                                            ARM::VLD1q32HighTPseudo,
3668                                            ARM::VLD1q64HighTPseudo };
3669      SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3670      return;
3671    }
3672
3673    case Intrinsic::arm_neon_vld1x4: {
3674      static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3675                                           ARM::VLD1d16QPseudo,
3676                                           ARM::VLD1d32QPseudo,
3677                                           ARM::VLD1d64QPseudo };
3678      static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3679                                            ARM::VLD1q16LowQPseudo_UPD,
3680                                            ARM::VLD1q32LowQPseudo_UPD,
3681                                            ARM::VLD1q64LowQPseudo_UPD };
3682      static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3683                                            ARM::VLD1q16HighQPseudo,
3684                                            ARM::VLD1q32HighQPseudo,
3685                                            ARM::VLD1q64HighQPseudo };
3686      SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3687      return;
3688    }
3689
3690    case Intrinsic::arm_neon_vld2: {
3691      static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3692                                           ARM::VLD2d32, ARM::VLD1q64 };
3693      static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3694                                           ARM::VLD2q32Pseudo };
3695      SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3696      return;
3697    }
3698
3699    case Intrinsic::arm_neon_vld3: {
3700      static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3701                                           ARM::VLD3d16Pseudo,
3702                                           ARM::VLD3d32Pseudo,
3703                                           ARM::VLD1d64TPseudo };
3704      static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3705                                            ARM::VLD3q16Pseudo_UPD,
3706                                            ARM::VLD3q32Pseudo_UPD };
3707      static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3708                                            ARM::VLD3q16oddPseudo,
3709                                            ARM::VLD3q32oddPseudo };
3710      SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3711      return;
3712    }
3713
3714    case Intrinsic::arm_neon_vld4: {
3715      static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3716                                           ARM::VLD4d16Pseudo,
3717                                           ARM::VLD4d32Pseudo,
3718                                           ARM::VLD1d64QPseudo };
3719      static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3720                                            ARM::VLD4q16Pseudo_UPD,
3721                                            ARM::VLD4q32Pseudo_UPD };
3722      static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3723                                            ARM::VLD4q16oddPseudo,
3724                                            ARM::VLD4q32oddPseudo };
3725      SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3726      return;
3727    }
3728
3729    case Intrinsic::arm_neon_vld2dup: {
3730      static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3731                                           ARM::VLD2DUPd32, ARM::VLD1q64 };
3732      static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3733                                            ARM::VLD2DUPq16EvenPseudo,
3734                                            ARM::VLD2DUPq32EvenPseudo };
3735      static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3736                                            ARM::VLD2DUPq16OddPseudo,
3737                                            ARM::VLD2DUPq32OddPseudo };
3738      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3739                   DOpcodes, QOpcodes0, QOpcodes1);
3740      return;
3741    }
3742
3743    case Intrinsic::arm_neon_vld3dup: {
3744      static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3745                                           ARM::VLD3DUPd16Pseudo,
3746                                           ARM::VLD3DUPd32Pseudo,
3747                                           ARM::VLD1d64TPseudo };
3748      static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3749                                            ARM::VLD3DUPq16EvenPseudo,
3750                                            ARM::VLD3DUPq32EvenPseudo };
3751      static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3752                                            ARM::VLD3DUPq16OddPseudo,
3753                                            ARM::VLD3DUPq32OddPseudo };
3754      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3755                   DOpcodes, QOpcodes0, QOpcodes1);
3756      return;
3757    }
3758
3759    case Intrinsic::arm_neon_vld4dup: {
3760      static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3761                                           ARM::VLD4DUPd16Pseudo,
3762                                           ARM::VLD4DUPd32Pseudo,
3763                                           ARM::VLD1d64QPseudo };
3764      static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3765                                            ARM::VLD4DUPq16EvenPseudo,
3766                                            ARM::VLD4DUPq32EvenPseudo };
3767      static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3768                                            ARM::VLD4DUPq16OddPseudo,
3769                                            ARM::VLD4DUPq32OddPseudo };
3770      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3771                   DOpcodes, QOpcodes0, QOpcodes1);
3772      return;
3773    }
3774
3775    case Intrinsic::arm_neon_vld2lane: {
3776      static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3777                                           ARM::VLD2LNd16Pseudo,
3778                                           ARM::VLD2LNd32Pseudo };
3779      static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3780                                           ARM::VLD2LNq32Pseudo };
3781      SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3782      return;
3783    }
3784
3785    case Intrinsic::arm_neon_vld3lane: {
3786      static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3787                                           ARM::VLD3LNd16Pseudo,
3788                                           ARM::VLD3LNd32Pseudo };
3789      static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3790                                           ARM::VLD3LNq32Pseudo };
3791      SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3792      return;
3793    }
3794
3795    case Intrinsic::arm_neon_vld4lane: {
3796      static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3797                                           ARM::VLD4LNd16Pseudo,
3798                                           ARM::VLD4LNd32Pseudo };
3799      static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3800                                           ARM::VLD4LNq32Pseudo };
3801      SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3802      return;
3803    }
3804
3805    case Intrinsic::arm_neon_vst1: {
3806      static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3807                                           ARM::VST1d32, ARM::VST1d64 };
3808      static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3809                                           ARM::VST1q32, ARM::VST1q64 };
3810      SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3811      return;
3812    }
3813
3814    case Intrinsic::arm_neon_vst1x2: {
3815      static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3816                                           ARM::VST1q32, ARM::VST1q64 };
3817      static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3818                                           ARM::VST1d16QPseudo,
3819                                           ARM::VST1d32QPseudo,
3820                                           ARM::VST1d64QPseudo };
3821      SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3822      return;
3823    }
3824
3825    case Intrinsic::arm_neon_vst1x3: {
3826      static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3827                                           ARM::VST1d16TPseudo,
3828                                           ARM::VST1d32TPseudo,
3829                                           ARM::VST1d64TPseudo };
3830      static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3831                                            ARM::VST1q16LowTPseudo_UPD,
3832                                            ARM::VST1q32LowTPseudo_UPD,
3833                                            ARM::VST1q64LowTPseudo_UPD };
3834      static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3835                                            ARM::VST1q16HighTPseudo,
3836                                            ARM::VST1q32HighTPseudo,
3837                                            ARM::VST1q64HighTPseudo };
3838      SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3839      return;
3840    }
3841
3842    case Intrinsic::arm_neon_vst1x4: {
3843      static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3844                                           ARM::VST1d16QPseudo,
3845                                           ARM::VST1d32QPseudo,
3846                                           ARM::VST1d64QPseudo };
3847      static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3848                                            ARM::VST1q16LowQPseudo_UPD,
3849                                            ARM::VST1q32LowQPseudo_UPD,
3850                                            ARM::VST1q64LowQPseudo_UPD };
3851      static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3852                                            ARM::VST1q16HighQPseudo,
3853                                            ARM::VST1q32HighQPseudo,
3854                                            ARM::VST1q64HighQPseudo };
3855      SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3856      return;
3857    }
3858
3859    case Intrinsic::arm_neon_vst2: {
3860      static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3861                                           ARM::VST2d32, ARM::VST1q64 };
3862      static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3863                                           ARM::VST2q32Pseudo };
3864      SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3865      return;
3866    }
3867
3868    case Intrinsic::arm_neon_vst3: {
3869      static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3870                                           ARM::VST3d16Pseudo,
3871                                           ARM::VST3d32Pseudo,
3872                                           ARM::VST1d64TPseudo };
3873      static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3874                                            ARM::VST3q16Pseudo_UPD,
3875                                            ARM::VST3q32Pseudo_UPD };
3876      static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3877                                            ARM::VST3q16oddPseudo,
3878                                            ARM::VST3q32oddPseudo };
3879      SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3880      return;
3881    }
3882
3883    case Intrinsic::arm_neon_vst4: {
3884      static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3885                                           ARM::VST4d16Pseudo,
3886                                           ARM::VST4d32Pseudo,
3887                                           ARM::VST1d64QPseudo };
3888      static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3889                                            ARM::VST4q16Pseudo_UPD,
3890                                            ARM::VST4q32Pseudo_UPD };
3891      static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3892                                            ARM::VST4q16oddPseudo,
3893                                            ARM::VST4q32oddPseudo };
3894      SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3895      return;
3896    }
3897
3898    case Intrinsic::arm_neon_vst2lane: {
3899      static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3900                                           ARM::VST2LNd16Pseudo,
3901                                           ARM::VST2LNd32Pseudo };
3902      static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3903                                           ARM::VST2LNq32Pseudo };
3904      SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3905      return;
3906    }
3907
3908    case Intrinsic::arm_neon_vst3lane: {
3909      static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3910                                           ARM::VST3LNd16Pseudo,
3911                                           ARM::VST3LNd32Pseudo };
3912      static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3913                                           ARM::VST3LNq32Pseudo };
3914      SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3915      return;
3916    }
3917
3918    case Intrinsic::arm_neon_vst4lane: {
3919      static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3920                                           ARM::VST4LNd16Pseudo,
3921                                           ARM::VST4LNd32Pseudo };
3922      static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3923                                           ARM::VST4LNq32Pseudo };
3924      SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3925      return;
3926    }
3927    }
3928    break;
3929  }
3930
3931  case ISD::ATOMIC_CMP_SWAP:
3932    SelectCMP_SWAP(N);
3933    return;
3934  }
3935
3936  SelectCode(N);
3937}
3938
3939// Inspect a register string of the form
3940// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3941// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3942// and obtain the integer operands from them, adding these operands to the
3943// provided vector.
3944static void getIntOperandsFromRegisterString(StringRef RegString,
3945                                             SelectionDAG *CurDAG,
3946                                             const SDLoc &DL,
3947                                             std::vector<SDValue> &Ops) {
3948  SmallVector<StringRef, 5> Fields;
3949  RegString.split(Fields, ':');
3950
3951  if (Fields.size() > 1) {
3952    bool AllIntFields = true;
3953
3954    for (StringRef Field : Fields) {
3955      // Need to trim out leading 'cp' characters and get the integer field.
3956      unsigned IntField;
3957      AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3958      Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3959    }
3960
3961    assert(AllIntFields &&
3962            "Unexpected non-integer value in special register string.");
3963  }
3964}
3965
3966// Maps a Banked Register string to its mask value. The mask value returned is
3967// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3968// mask operand, which expresses which register is to be used, e.g. r8, and in
3969// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3970// was invalid.
3971static inline int getBankedRegisterMask(StringRef RegString) {
3972  auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3973  if (!TheReg)
3974     return -1;
3975  return TheReg->Encoding;
3976}
3977
3978// The flags here are common to those allowed for apsr in the A class cores and
3979// those allowed for the special registers in the M class cores. Returns a
3980// value representing which flags were present, -1 if invalid.
3981static inline int getMClassFlagsMask(StringRef Flags) {
3982  return StringSwitch<int>(Flags)
3983          .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3984                         // correct when flags are not permitted
3985          .Case("g", 0x1)
3986          .Case("nzcvq", 0x2)
3987          .Case("nzcvqg", 0x3)
3988          .Default(-1);
3989}
3990
3991// Maps MClass special registers string to its value for use in the
3992// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3993// Returns -1 to signify that the string was invalid.
3994static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3995  auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3996  const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3997  if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3998    return -1;
3999  return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4000}
4001
4002static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4003  // The mask operand contains the special register (R Bit) in bit 4, whether
4004  // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4005  // bits 3-0 contains the fields to be accessed in the special register, set by
4006  // the flags provided with the register.
4007  int Mask = 0;
4008  if (Reg == "apsr") {
4009    // The flags permitted for apsr are the same flags that are allowed in
4010    // M class registers. We get the flag value and then shift the flags into
4011    // the correct place to combine with the mask.
4012    Mask = getMClassFlagsMask(Flags);
4013    if (Mask == -1)
4014      return -1;
4015    return Mask << 2;
4016  }
4017
4018  if (Reg != "cpsr" && Reg != "spsr") {
4019    return -1;
4020  }
4021
4022  // This is the same as if the flags were "fc"
4023  if (Flags.empty() || Flags == "all")
4024    return Mask | 0x9;
4025
4026  // Inspect the supplied flags string and set the bits in the mask for
4027  // the relevant and valid flags allowed for cpsr and spsr.
4028  for (char Flag : Flags) {
4029    int FlagVal;
4030    switch (Flag) {
4031      case 'c':
4032        FlagVal = 0x1;
4033        break;
4034      case 'x':
4035        FlagVal = 0x2;
4036        break;
4037      case 's':
4038        FlagVal = 0x4;
4039        break;
4040      case 'f':
4041        FlagVal = 0x8;
4042        break;
4043      default:
4044        FlagVal = 0;
4045    }
4046
4047    // This avoids allowing strings where the same flag bit appears twice.
4048    if (!FlagVal || (Mask & FlagVal))
4049      return -1;
4050    Mask |= FlagVal;
4051  }
4052
4053  // If the register is spsr then we need to set the R bit.
4054  if (Reg == "spsr")
4055    Mask |= 0x10;
4056
4057  return Mask;
4058}
4059
4060// Lower the read_register intrinsic to ARM specific DAG nodes
4061// using the supplied metadata string to select the instruction node to use
4062// and the registers/masks to construct as operands for the node.
4063bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4064  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4065  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4066  bool IsThumb2 = Subtarget->isThumb2();
4067  SDLoc DL(N);
4068
4069  std::vector<SDValue> Ops;
4070  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4071
4072  if (!Ops.empty()) {
4073    // If the special register string was constructed of fields (as defined
4074    // in the ACLE) then need to lower to MRC node (32 bit) or
4075    // MRRC node(64 bit), we can make the distinction based on the number of
4076    // operands we have.
4077    unsigned Opcode;
4078    SmallVector<EVT, 3> ResTypes;
4079    if (Ops.size() == 5){
4080      Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4081      ResTypes.append({ MVT::i32, MVT::Other });
4082    } else {
4083      assert(Ops.size() == 3 &&
4084              "Invalid number of fields in special register string.");
4085      Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4086      ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4087    }
4088
4089    Ops.push_back(getAL(CurDAG, DL));
4090    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4091    Ops.push_back(N->getOperand(0));
4092    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4093    return true;
4094  }
4095
4096  std::string SpecialReg = RegString->getString().lower();
4097
4098  int BankedReg = getBankedRegisterMask(SpecialReg);
4099  if (BankedReg != -1) {
4100    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4101            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4102            N->getOperand(0) };
4103    ReplaceNode(
4104        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4105                                  DL, MVT::i32, MVT::Other, Ops));
4106    return true;
4107  }
4108
4109  // The VFP registers are read by creating SelectionDAG nodes with opcodes
4110  // corresponding to the register that is being read from. So we switch on the
4111  // string to find which opcode we need to use.
4112  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4113                    .Case("fpscr", ARM::VMRS)
4114                    .Case("fpexc", ARM::VMRS_FPEXC)
4115                    .Case("fpsid", ARM::VMRS_FPSID)
4116                    .Case("mvfr0", ARM::VMRS_MVFR0)
4117                    .Case("mvfr1", ARM::VMRS_MVFR1)
4118                    .Case("mvfr2", ARM::VMRS_MVFR2)
4119                    .Case("fpinst", ARM::VMRS_FPINST)
4120                    .Case("fpinst2", ARM::VMRS_FPINST2)
4121                    .Default(0);
4122
4123  // If an opcode was found then we can lower the read to a VFP instruction.
4124  if (Opcode) {
4125    if (!Subtarget->hasVFP2Base())
4126      return false;
4127    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4128      return false;
4129
4130    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4131            N->getOperand(0) };
4132    ReplaceNode(N,
4133                CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4134    return true;
4135  }
4136
4137  // If the target is M Class then need to validate that the register string
4138  // is an acceptable value, so check that a mask can be constructed from the
4139  // string.
4140  if (Subtarget->isMClass()) {
4141    int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4142    if (SYSmValue == -1)
4143      return false;
4144
4145    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4146                      getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4147                      N->getOperand(0) };
4148    ReplaceNode(
4149        N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4150    return true;
4151  }
4152
4153  // Here we know the target is not M Class so we need to check if it is one
4154  // of the remaining possible values which are apsr, cpsr or spsr.
4155  if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4156    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4157            N->getOperand(0) };
4158    ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4159                                          DL, MVT::i32, MVT::Other, Ops));
4160    return true;
4161  }
4162
4163  if (SpecialReg == "spsr") {
4164    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4165            N->getOperand(0) };
4166    ReplaceNode(
4167        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4168                                  MVT::i32, MVT::Other, Ops));
4169    return true;
4170  }
4171
4172  return false;
4173}
4174
4175// Lower the write_register intrinsic to ARM specific DAG nodes
4176// using the supplied metadata string to select the instruction node to use
4177// and the registers/masks to use in the nodes
4178bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4179  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4180  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4181  bool IsThumb2 = Subtarget->isThumb2();
4182  SDLoc DL(N);
4183
4184  std::vector<SDValue> Ops;
4185  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4186
4187  if (!Ops.empty()) {
4188    // If the special register string was constructed of fields (as defined
4189    // in the ACLE) then need to lower to MCR node (32 bit) or
4190    // MCRR node(64 bit), we can make the distinction based on the number of
4191    // operands we have.
4192    unsigned Opcode;
4193    if (Ops.size() == 5) {
4194      Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4195      Ops.insert(Ops.begin()+2, N->getOperand(2));
4196    } else {
4197      assert(Ops.size() == 3 &&
4198              "Invalid number of fields in special register string.");
4199      Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4200      SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4201      Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4202    }
4203
4204    Ops.push_back(getAL(CurDAG, DL));
4205    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4206    Ops.push_back(N->getOperand(0));
4207
4208    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4209    return true;
4210  }
4211
4212  std::string SpecialReg = RegString->getString().lower();
4213  int BankedReg = getBankedRegisterMask(SpecialReg);
4214  if (BankedReg != -1) {
4215    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4216            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4217            N->getOperand(0) };
4218    ReplaceNode(
4219        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4220                                  DL, MVT::Other, Ops));
4221    return true;
4222  }
4223
4224  // The VFP registers are written to by creating SelectionDAG nodes with
4225  // opcodes corresponding to the register that is being written. So we switch
4226  // on the string to find which opcode we need to use.
4227  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4228                    .Case("fpscr", ARM::VMSR)
4229                    .Case("fpexc", ARM::VMSR_FPEXC)
4230                    .Case("fpsid", ARM::VMSR_FPSID)
4231                    .Case("fpinst", ARM::VMSR_FPINST)
4232                    .Case("fpinst2", ARM::VMSR_FPINST2)
4233                    .Default(0);
4234
4235  if (Opcode) {
4236    if (!Subtarget->hasVFP2Base())
4237      return false;
4238    Ops = { N->getOperand(2), getAL(CurDAG, DL),
4239            CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4240    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4241    return true;
4242  }
4243
4244  std::pair<StringRef, StringRef> Fields;
4245  Fields = StringRef(SpecialReg).rsplit('_');
4246  std::string Reg = Fields.first.str();
4247  StringRef Flags = Fields.second;
4248
4249  // If the target was M Class then need to validate the special register value
4250  // and retrieve the mask for use in the instruction node.
4251  if (Subtarget->isMClass()) {
4252    int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4253    if (SYSmValue == -1)
4254      return false;
4255
4256    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4257                      N->getOperand(2), getAL(CurDAG, DL),
4258                      CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4259    ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4260    return true;
4261  }
4262
4263  // We then check to see if a valid mask can be constructed for one of the
4264  // register string values permitted for the A and R class cores. These values
4265  // are apsr, spsr and cpsr; these are also valid on older cores.
4266  int Mask = getARClassRegisterMask(Reg, Flags);
4267  if (Mask != -1) {
4268    Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4269            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4270            N->getOperand(0) };
4271    ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4272                                          DL, MVT::Other, Ops));
4273    return true;
4274  }
4275
4276  return false;
4277}
4278
4279bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4280  std::vector<SDValue> AsmNodeOperands;
4281  unsigned Flag, Kind;
4282  bool Changed = false;
4283  unsigned NumOps = N->getNumOperands();
4284
4285  // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4286  // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4287  // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4288  // respectively. Since there is no constraint to explicitly specify a
4289  // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4290  // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4291  // them into a GPRPair.
4292
4293  SDLoc dl(N);
4294  SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4295                                   : SDValue(nullptr,0);
4296
4297  SmallVector<bool, 8> OpChanged;
4298  // Glue node will be appended late.
4299  for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4300    SDValue op = N->getOperand(i);
4301    AsmNodeOperands.push_back(op);
4302
4303    if (i < InlineAsm::Op_FirstOperand)
4304      continue;
4305
4306    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4307      Flag = C->getZExtValue();
4308      Kind = InlineAsm::getKind(Flag);
4309    }
4310    else
4311      continue;
4312
4313    // Immediate operands to inline asm in the SelectionDAG are modeled with
4314    // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4315    // the second is a constant with the value of the immediate. If we get here
4316    // and we have a Kind_Imm, skip the next operand, and continue.
4317    if (Kind == InlineAsm::Kind_Imm) {
4318      SDValue op = N->getOperand(++i);
4319      AsmNodeOperands.push_back(op);
4320      continue;
4321    }
4322
4323    unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4324    if (NumRegs)
4325      OpChanged.push_back(false);
4326
4327    unsigned DefIdx = 0;
4328    bool IsTiedToChangedOp = false;
4329    // If it's a use that is tied with a previous def, it has no
4330    // reg class constraint.
4331    if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4332      IsTiedToChangedOp = OpChanged[DefIdx];
4333
4334    // Memory operands to inline asm in the SelectionDAG are modeled with two
4335    // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4336    // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4337    // it doesn't get misinterpreted), and continue. We do this here because
4338    // it's important to update the OpChanged array correctly before moving on.
4339    if (Kind == InlineAsm::Kind_Mem) {
4340      SDValue op = N->getOperand(++i);
4341      AsmNodeOperands.push_back(op);
4342      continue;
4343    }
4344
4345    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4346        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4347      continue;
4348
4349    unsigned RC;
4350    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4351    if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4352        || NumRegs != 2)
4353      continue;
4354
4355    assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4356    SDValue V0 = N->getOperand(i+1);
4357    SDValue V1 = N->getOperand(i+2);
4358    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4359    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4360    SDValue PairedReg;
4361    MachineRegisterInfo &MRI = MF->getRegInfo();
4362
4363    if (Kind == InlineAsm::Kind_RegDef ||
4364        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4365      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4366      // the original GPRs.
4367
4368      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4369      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4370      SDValue Chain = SDValue(N,0);
4371
4372      SDNode *GU = N->getGluedUser();
4373      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4374                                               Chain.getValue(1));
4375
4376      // Extract values from a GPRPair reg and copy to the original GPR reg.
4377      SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4378                                                    RegCopy);
4379      SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4380                                                    RegCopy);
4381      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4382                                        RegCopy.getValue(1));
4383      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4384
4385      // Update the original glue user.
4386      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4387      Ops.push_back(T1.getValue(1));
4388      CurDAG->UpdateNodeOperands(GU, Ops);
4389    }
4390    else {
4391      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4392      // GPRPair and then pass the GPRPair to the inline asm.
4393      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4394
4395      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4396      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4397                                          Chain.getValue(1));
4398      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4399                                          T0.getValue(1));
4400      SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4401
4402      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4403      // i32 VRs of inline asm with it.
4404      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4405      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4406      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4407
4408      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4409      Glue = Chain.getValue(1);
4410    }
4411
4412    Changed = true;
4413
4414    if(PairedReg.getNode()) {
4415      OpChanged[OpChanged.size() -1 ] = true;
4416      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4417      if (IsTiedToChangedOp)
4418        Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4419      else
4420        Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4421      // Replace the current flag.
4422      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4423          Flag, dl, MVT::i32);
4424      // Add the new register node and skip the original two GPRs.
4425      AsmNodeOperands.push_back(PairedReg);
4426      // Skip the next two GPRs.
4427      i += 2;
4428    }
4429  }
4430
4431  if (Glue.getNode())
4432    AsmNodeOperands.push_back(Glue);
4433  if (!Changed)
4434    return false;
4435
4436  SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4437      CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4438  New->setNodeId(-1);
4439  ReplaceNode(N, New.getNode());
4440  return true;
4441}
4442
4443
4444bool ARMDAGToDAGISel::
4445SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4446                             std::vector<SDValue> &OutOps) {
4447  switch(ConstraintID) {
4448  default:
4449    llvm_unreachable("Unexpected asm memory constraint");
4450  case InlineAsm::Constraint_i:
4451    // FIXME: It seems strange that 'i' is needed here since it's supposed to
4452    //        be an immediate and not a memory constraint.
4453    LLVM_FALLTHROUGH;
4454  case InlineAsm::Constraint_m:
4455  case InlineAsm::Constraint_o:
4456  case InlineAsm::Constraint_Q:
4457  case InlineAsm::Constraint_Um:
4458  case InlineAsm::Constraint_Un:
4459  case InlineAsm::Constraint_Uq:
4460  case InlineAsm::Constraint_Us:
4461  case InlineAsm::Constraint_Ut:
4462  case InlineAsm::Constraint_Uv:
4463  case InlineAsm::Constraint_Uy:
4464    // Require the address to be in a register.  That is safe for all ARM
4465    // variants and it is hard to do anything much smarter without knowing
4466    // how the operand is used.
4467    OutOps.push_back(Op);
4468    return false;
4469  }
4470  return true;
4471}
4472
4473/// createARMISelDag - This pass converts a legalized DAG into a
4474/// ARM-specific DAG, ready for instruction scheduling.
4475///
4476FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4477                                     CodeGenOpt::Level OptLevel) {
4478  return new ARMDAGToDAGISel(TM, OptLevel);
4479}
4480