AArch64ISelDAGToDAG.cpp revision 360660
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64TargetMachine.h"
14#include "MCTargetDesc/AArch64AddressingModes.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/CodeGen/SelectionDAGISel.h"
17#include "llvm/IR/Function.h" // To access function attributes.
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/Intrinsics.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/KnownBits.h"
23#include "llvm/Support/MathExtras.h"
24#include "llvm/Support/raw_ostream.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "aarch64-isel"
29
30//===--------------------------------------------------------------------===//
31/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32/// instructions for SelectionDAG operations.
33///
34namespace {
35
36class AArch64DAGToDAGISel : public SelectionDAGISel {
37
38  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
39  /// make the right decision when generating code for different targets.
40  const AArch64Subtarget *Subtarget;
41
42  bool ForCodeSize;
43
44public:
45  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46                               CodeGenOpt::Level OptLevel)
47      : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
48        ForCodeSize(false) {}
49
50  StringRef getPassName() const override {
51    return "AArch64 Instruction Selection";
52  }
53
54  bool runOnMachineFunction(MachineFunction &MF) override {
55    ForCodeSize = MF.getFunction().hasOptSize();
56    Subtarget = &MF.getSubtarget<AArch64Subtarget>();
57    return SelectionDAGISel::runOnMachineFunction(MF);
58  }
59
60  void Select(SDNode *Node) override;
61
62  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63  /// inline asm expressions.
64  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65                                    unsigned ConstraintID,
66                                    std::vector<SDValue> &OutOps) override;
67
68  bool tryMLAV64LaneV128(SDNode *N);
69  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
70  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74    return SelectShiftedRegister(N, false, Reg, Shift);
75  }
76  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77    return SelectShiftedRegister(N, true, Reg, Shift);
78  }
79  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80    return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81  }
82  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83    return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84  }
85  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86    return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87  }
88  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89    return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90  }
91  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92    return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93  }
94  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95    return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96  }
97  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98    return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99  }
100  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101    return SelectAddrModeIndexed(N, 1, Base, OffImm);
102  }
103  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104    return SelectAddrModeIndexed(N, 2, Base, OffImm);
105  }
106  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107    return SelectAddrModeIndexed(N, 4, Base, OffImm);
108  }
109  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110    return SelectAddrModeIndexed(N, 8, Base, OffImm);
111  }
112  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113    return SelectAddrModeIndexed(N, 16, Base, OffImm);
114  }
115  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116    return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117  }
118  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119    return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120  }
121  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122    return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123  }
124  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125    return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126  }
127  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128    return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129  }
130
131  template<int Width>
132  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
133                         SDValue &SignExtend, SDValue &DoShift) {
134    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135  }
136
137  template<int Width>
138  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
139                         SDValue &SignExtend, SDValue &DoShift) {
140    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141  }
142
143
144  /// Form sequences of consecutive 64/128-bit registers for use in NEON
145  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
146  /// between 1 and 4 elements. If it contains a single element that is returned
147  /// unchanged; otherwise a REG_SEQUENCE value is returned.
148  SDValue createDTuple(ArrayRef<SDValue> Vecs);
149  SDValue createQTuple(ArrayRef<SDValue> Vecs);
150
151  /// Generic helper for the createDTuple/createQTuple
152  /// functions. Those should almost always be called instead.
153  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
154                      const unsigned SubRegs[]);
155
156  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
157
158  bool tryIndexedLoad(SDNode *N);
159
160  bool trySelectStackSlotTagP(SDNode *N);
161  void SelectTagP(SDNode *N);
162
163  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
164                     unsigned SubRegIdx);
165  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
166                         unsigned SubRegIdx);
167  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
168  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
169
170  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
171  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
172  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
173  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
174
175  bool tryBitfieldExtractOp(SDNode *N);
176  bool tryBitfieldExtractOpFromSExt(SDNode *N);
177  bool tryBitfieldInsertOp(SDNode *N);
178  bool tryBitfieldInsertInZeroOp(SDNode *N);
179  bool tryShiftAmountMod(SDNode *N);
180
181  bool tryReadRegister(SDNode *N);
182  bool tryWriteRegister(SDNode *N);
183
184// Include the pieces autogenerated from the target description.
185#include "AArch64GenDAGISel.inc"
186
187private:
188  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
189                             SDValue &Shift);
190  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
191                               SDValue &OffImm) {
192    return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
193  }
194  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
195                                     unsigned Size, SDValue &Base,
196                                     SDValue &OffImm);
197  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
198                             SDValue &OffImm);
199  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
200                              SDValue &OffImm);
201  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
202                         SDValue &Offset, SDValue &SignExtend,
203                         SDValue &DoShift);
204  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
205                         SDValue &Offset, SDValue &SignExtend,
206                         SDValue &DoShift);
207  bool isWorthFolding(SDValue V) const;
208  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
209                         SDValue &Offset, SDValue &SignExtend);
210
211  template<unsigned RegWidth>
212  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
213    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
214  }
215
216  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
217
218  bool SelectCMP_SWAP(SDNode *N);
219
220};
221} // end anonymous namespace
222
223/// isIntImmediate - This method tests to see if the node is a constant
224/// operand. If so Imm will receive the 32-bit value.
225static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
226  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
227    Imm = C->getZExtValue();
228    return true;
229  }
230  return false;
231}
232
233// isIntImmediate - This method tests to see if a constant operand.
234// If so Imm will receive the value.
235static bool isIntImmediate(SDValue N, uint64_t &Imm) {
236  return isIntImmediate(N.getNode(), Imm);
237}
238
239// isOpcWithIntImmediate - This method tests to see if the node is a specific
240// opcode and that it has a immediate integer right operand.
241// If so Imm will receive the 32 bit value.
242static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
243                                  uint64_t &Imm) {
244  return N->getOpcode() == Opc &&
245         isIntImmediate(N->getOperand(1).getNode(), Imm);
246}
247
248bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
249    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
250  switch(ConstraintID) {
251  default:
252    llvm_unreachable("Unexpected asm memory constraint");
253  case InlineAsm::Constraint_i:
254  case InlineAsm::Constraint_m:
255  case InlineAsm::Constraint_Q:
256    // We need to make sure that this one operand does not end up in XZR, thus
257    // require the address to be in a PointerRegClass register.
258    const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
259    const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
260    SDLoc dl(Op);
261    SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
262    SDValue NewOp =
263        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
264                                       dl, Op.getValueType(),
265                                       Op, RC), 0);
266    OutOps.push_back(NewOp);
267    return false;
268  }
269  return true;
270}
271
272/// SelectArithImmed - Select an immediate value that can be represented as
273/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
274/// Val set to the 12-bit value and Shift set to the shifter operand.
275bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
276                                           SDValue &Shift) {
277  // This function is called from the addsub_shifted_imm ComplexPattern,
278  // which lists [imm] as the list of opcode it's interested in, however
279  // we still need to check whether the operand is actually an immediate
280  // here because the ComplexPattern opcode list is only used in
281  // root-level opcode matching.
282  if (!isa<ConstantSDNode>(N.getNode()))
283    return false;
284
285  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
286  unsigned ShiftAmt;
287
288  if (Immed >> 12 == 0) {
289    ShiftAmt = 0;
290  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
291    ShiftAmt = 12;
292    Immed = Immed >> 12;
293  } else
294    return false;
295
296  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
297  SDLoc dl(N);
298  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
299  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
300  return true;
301}
302
303/// SelectNegArithImmed - As above, but negates the value before trying to
304/// select it.
305bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
306                                              SDValue &Shift) {
307  // This function is called from the addsub_shifted_imm ComplexPattern,
308  // which lists [imm] as the list of opcode it's interested in, however
309  // we still need to check whether the operand is actually an immediate
310  // here because the ComplexPattern opcode list is only used in
311  // root-level opcode matching.
312  if (!isa<ConstantSDNode>(N.getNode()))
313    return false;
314
315  // The immediate operand must be a 24-bit zero-extended immediate.
316  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
317
318  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
319  // have the opposite effect on the C flag, so this pattern mustn't match under
320  // those circumstances.
321  if (Immed == 0)
322    return false;
323
324  if (N.getValueType() == MVT::i32)
325    Immed = ~((uint32_t)Immed) + 1;
326  else
327    Immed = ~Immed + 1ULL;
328  if (Immed & 0xFFFFFFFFFF000000ULL)
329    return false;
330
331  Immed &= 0xFFFFFFULL;
332  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
333                          Shift);
334}
335
336/// getShiftTypeForNode - Translate a shift node to the corresponding
337/// ShiftType value.
338static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
339  switch (N.getOpcode()) {
340  default:
341    return AArch64_AM::InvalidShiftExtend;
342  case ISD::SHL:
343    return AArch64_AM::LSL;
344  case ISD::SRL:
345    return AArch64_AM::LSR;
346  case ISD::SRA:
347    return AArch64_AM::ASR;
348  case ISD::ROTR:
349    return AArch64_AM::ROR;
350  }
351}
352
353/// Determine whether it is worth it to fold SHL into the addressing
354/// mode.
355static bool isWorthFoldingSHL(SDValue V) {
356  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
357  // It is worth folding logical shift of up to three places.
358  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
359  if (!CSD)
360    return false;
361  unsigned ShiftVal = CSD->getZExtValue();
362  if (ShiftVal > 3)
363    return false;
364
365  // Check if this particular node is reused in any non-memory related
366  // operation.  If yes, do not try to fold this node into the address
367  // computation, since the computation will be kept.
368  const SDNode *Node = V.getNode();
369  for (SDNode *UI : Node->uses())
370    if (!isa<MemSDNode>(*UI))
371      for (SDNode *UII : UI->uses())
372        if (!isa<MemSDNode>(*UII))
373          return false;
374  return true;
375}
376
377/// Determine whether it is worth to fold V into an extended register.
378bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
379  // Trivial if we are optimizing for code size or if there is only
380  // one use of the value.
381  if (ForCodeSize || V.hasOneUse())
382    return true;
383  // If a subtarget has a fastpath LSL we can fold a logical shift into
384  // the addressing mode and save a cycle.
385  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
386      isWorthFoldingSHL(V))
387    return true;
388  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
389    const SDValue LHS = V.getOperand(0);
390    const SDValue RHS = V.getOperand(1);
391    if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
392      return true;
393    if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
394      return true;
395  }
396
397  // It hurts otherwise, since the value will be reused.
398  return false;
399}
400
401/// SelectShiftedRegister - Select a "shifted register" operand.  If the value
402/// is not shifted, set the Shift operand to default of "LSL 0".  The logical
403/// instructions allow the shifted register to be rotated, but the arithmetic
404/// instructions do not.  The AllowROR parameter specifies whether ROR is
405/// supported.
406bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
407                                                SDValue &Reg, SDValue &Shift) {
408  AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
409  if (ShType == AArch64_AM::InvalidShiftExtend)
410    return false;
411  if (!AllowROR && ShType == AArch64_AM::ROR)
412    return false;
413
414  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
415    unsigned BitSize = N.getValueSizeInBits();
416    unsigned Val = RHS->getZExtValue() & (BitSize - 1);
417    unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
418
419    Reg = N.getOperand(0);
420    Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
421    return isWorthFolding(N);
422  }
423
424  return false;
425}
426
427/// getExtendTypeForNode - Translate an extend node to the corresponding
428/// ExtendType value.
429static AArch64_AM::ShiftExtendType
430getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
431  if (N.getOpcode() == ISD::SIGN_EXTEND ||
432      N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
433    EVT SrcVT;
434    if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
435      SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
436    else
437      SrcVT = N.getOperand(0).getValueType();
438
439    if (!IsLoadStore && SrcVT == MVT::i8)
440      return AArch64_AM::SXTB;
441    else if (!IsLoadStore && SrcVT == MVT::i16)
442      return AArch64_AM::SXTH;
443    else if (SrcVT == MVT::i32)
444      return AArch64_AM::SXTW;
445    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
446
447    return AArch64_AM::InvalidShiftExtend;
448  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
449             N.getOpcode() == ISD::ANY_EXTEND) {
450    EVT SrcVT = N.getOperand(0).getValueType();
451    if (!IsLoadStore && SrcVT == MVT::i8)
452      return AArch64_AM::UXTB;
453    else if (!IsLoadStore && SrcVT == MVT::i16)
454      return AArch64_AM::UXTH;
455    else if (SrcVT == MVT::i32)
456      return AArch64_AM::UXTW;
457    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
458
459    return AArch64_AM::InvalidShiftExtend;
460  } else if (N.getOpcode() == ISD::AND) {
461    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
462    if (!CSD)
463      return AArch64_AM::InvalidShiftExtend;
464    uint64_t AndMask = CSD->getZExtValue();
465
466    switch (AndMask) {
467    default:
468      return AArch64_AM::InvalidShiftExtend;
469    case 0xFF:
470      return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
471    case 0xFFFF:
472      return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
473    case 0xFFFFFFFF:
474      return AArch64_AM::UXTW;
475    }
476  }
477
478  return AArch64_AM::InvalidShiftExtend;
479}
480
481// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
482static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
483  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
484      DL->getOpcode() != AArch64ISD::DUPLANE32)
485    return false;
486
487  SDValue SV = DL->getOperand(0);
488  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
489    return false;
490
491  SDValue EV = SV.getOperand(1);
492  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
493    return false;
494
495  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
496  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
497  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
498  LaneOp = EV.getOperand(0);
499
500  return true;
501}
502
503// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
504// high lane extract.
505static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
506                             SDValue &LaneOp, int &LaneIdx) {
507
508  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
509    std::swap(Op0, Op1);
510    if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
511      return false;
512  }
513  StdOp = Op1;
514  return true;
515}
516
517/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
518/// is a lane in the upper half of a 128-bit vector.  Recognize and select this
519/// so that we don't emit unnecessary lane extracts.
520bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
521  SDLoc dl(N);
522  SDValue Op0 = N->getOperand(0);
523  SDValue Op1 = N->getOperand(1);
524  SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
525  SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
526  int LaneIdx = -1; // Will hold the lane index.
527
528  if (Op1.getOpcode() != ISD::MUL ||
529      !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
530                        LaneIdx)) {
531    std::swap(Op0, Op1);
532    if (Op1.getOpcode() != ISD::MUL ||
533        !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
534                          LaneIdx))
535      return false;
536  }
537
538  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
539
540  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
541
542  unsigned MLAOpc = ~0U;
543
544  switch (N->getSimpleValueType(0).SimpleTy) {
545  default:
546    llvm_unreachable("Unrecognized MLA.");
547  case MVT::v4i16:
548    MLAOpc = AArch64::MLAv4i16_indexed;
549    break;
550  case MVT::v8i16:
551    MLAOpc = AArch64::MLAv8i16_indexed;
552    break;
553  case MVT::v2i32:
554    MLAOpc = AArch64::MLAv2i32_indexed;
555    break;
556  case MVT::v4i32:
557    MLAOpc = AArch64::MLAv4i32_indexed;
558    break;
559  }
560
561  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
562  return true;
563}
564
565bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
566  SDLoc dl(N);
567  SDValue SMULLOp0;
568  SDValue SMULLOp1;
569  int LaneIdx;
570
571  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
572                        LaneIdx))
573    return false;
574
575  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
576
577  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
578
579  unsigned SMULLOpc = ~0U;
580
581  if (IntNo == Intrinsic::aarch64_neon_smull) {
582    switch (N->getSimpleValueType(0).SimpleTy) {
583    default:
584      llvm_unreachable("Unrecognized SMULL.");
585    case MVT::v4i32:
586      SMULLOpc = AArch64::SMULLv4i16_indexed;
587      break;
588    case MVT::v2i64:
589      SMULLOpc = AArch64::SMULLv2i32_indexed;
590      break;
591    }
592  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
593    switch (N->getSimpleValueType(0).SimpleTy) {
594    default:
595      llvm_unreachable("Unrecognized SMULL.");
596    case MVT::v4i32:
597      SMULLOpc = AArch64::UMULLv4i16_indexed;
598      break;
599    case MVT::v2i64:
600      SMULLOpc = AArch64::UMULLv2i32_indexed;
601      break;
602    }
603  } else
604    llvm_unreachable("Unrecognized intrinsic.");
605
606  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
607  return true;
608}
609
610/// Instructions that accept extend modifiers like UXTW expect the register
611/// being extended to be a GPR32, but the incoming DAG might be acting on a
612/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
613/// this is the case.
614static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
615  if (N.getValueType() == MVT::i32)
616    return N;
617
618  SDLoc dl(N);
619  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
620  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
621                                               dl, MVT::i32, N, SubReg);
622  return SDValue(Node, 0);
623}
624
625
626/// SelectArithExtendedRegister - Select a "extended register" operand.  This
627/// operand folds in an extend followed by an optional left shift.
628bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
629                                                      SDValue &Shift) {
630  unsigned ShiftVal = 0;
631  AArch64_AM::ShiftExtendType Ext;
632
633  if (N.getOpcode() == ISD::SHL) {
634    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
635    if (!CSD)
636      return false;
637    ShiftVal = CSD->getZExtValue();
638    if (ShiftVal > 4)
639      return false;
640
641    Ext = getExtendTypeForNode(N.getOperand(0));
642    if (Ext == AArch64_AM::InvalidShiftExtend)
643      return false;
644
645    Reg = N.getOperand(0).getOperand(0);
646  } else {
647    Ext = getExtendTypeForNode(N);
648    if (Ext == AArch64_AM::InvalidShiftExtend)
649      return false;
650
651    Reg = N.getOperand(0);
652
653    // Don't match if free 32-bit -> 64-bit zext can be used instead.
654    if (Ext == AArch64_AM::UXTW &&
655        Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
656      return false;
657  }
658
659  // AArch64 mandates that the RHS of the operation must use the smallest
660  // register class that could contain the size being extended from.  Thus,
661  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
662  // there might not be an actual 32-bit value in the program.  We can
663  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
664  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
665  Reg = narrowIfNeeded(CurDAG, Reg);
666  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
667                                    MVT::i32);
668  return isWorthFolding(N);
669}
670
671/// If there's a use of this ADDlow that's not itself a load/store then we'll
672/// need to create a real ADD instruction from it anyway and there's no point in
673/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
674/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
675/// leads to duplicated ADRP instructions.
676static bool isWorthFoldingADDlow(SDValue N) {
677  for (auto Use : N->uses()) {
678    if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
679        Use->getOpcode() != ISD::ATOMIC_LOAD &&
680        Use->getOpcode() != ISD::ATOMIC_STORE)
681      return false;
682
683    // ldar and stlr have much more restrictive addressing modes (just a
684    // register).
685    if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
686      return false;
687  }
688
689  return true;
690}
691
692/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
693/// immediate" address.  The "Size" argument is the size in bytes of the memory
694/// reference, which determines the scale.
695bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
696                                                        unsigned BW, unsigned Size,
697                                                        SDValue &Base,
698                                                        SDValue &OffImm) {
699  SDLoc dl(N);
700  const DataLayout &DL = CurDAG->getDataLayout();
701  const TargetLowering *TLI = getTargetLowering();
702  if (N.getOpcode() == ISD::FrameIndex) {
703    int FI = cast<FrameIndexSDNode>(N)->getIndex();
704    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
705    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
706    return true;
707  }
708
709  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
710  // selected here doesn't support labels/immediates, only base+offset.
711  if (CurDAG->isBaseWithConstantOffset(N)) {
712    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
713      if (IsSignedImm) {
714        int64_t RHSC = RHS->getSExtValue();
715        unsigned Scale = Log2_32(Size);
716        int64_t Range = 0x1LL << (BW - 1);
717
718        if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
719            RHSC < (Range << Scale)) {
720          Base = N.getOperand(0);
721          if (Base.getOpcode() == ISD::FrameIndex) {
722            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
723            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
724          }
725          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
726          return true;
727        }
728      } else {
729        // unsigned Immediate
730        uint64_t RHSC = RHS->getZExtValue();
731        unsigned Scale = Log2_32(Size);
732        uint64_t Range = 0x1ULL << BW;
733
734        if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
735          Base = N.getOperand(0);
736          if (Base.getOpcode() == ISD::FrameIndex) {
737            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
738            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
739          }
740          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
741          return true;
742        }
743      }
744    }
745  }
746  // Base only. The address will be materialized into a register before
747  // the memory is accessed.
748  //    add x0, Xbase, #offset
749  //    stp x1, x2, [x0]
750  Base = N;
751  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
752  return true;
753}
754
755/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
756/// immediate" address.  The "Size" argument is the size in bytes of the memory
757/// reference, which determines the scale.
758bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
759                                              SDValue &Base, SDValue &OffImm) {
760  SDLoc dl(N);
761  const DataLayout &DL = CurDAG->getDataLayout();
762  const TargetLowering *TLI = getTargetLowering();
763  if (N.getOpcode() == ISD::FrameIndex) {
764    int FI = cast<FrameIndexSDNode>(N)->getIndex();
765    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
766    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
767    return true;
768  }
769
770  if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
771    GlobalAddressSDNode *GAN =
772        dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
773    Base = N.getOperand(0);
774    OffImm = N.getOperand(1);
775    if (!GAN)
776      return true;
777
778    if (GAN->getOffset() % Size == 0) {
779      const GlobalValue *GV = GAN->getGlobal();
780      unsigned Alignment = GV->getAlignment();
781      Type *Ty = GV->getValueType();
782      if (Alignment == 0 && Ty->isSized())
783        Alignment = DL.getABITypeAlignment(Ty);
784
785      if (Alignment >= Size)
786        return true;
787    }
788  }
789
790  if (CurDAG->isBaseWithConstantOffset(N)) {
791    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
792      int64_t RHSC = (int64_t)RHS->getZExtValue();
793      unsigned Scale = Log2_32(Size);
794      if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
795        Base = N.getOperand(0);
796        if (Base.getOpcode() == ISD::FrameIndex) {
797          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
798          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
799        }
800        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
801        return true;
802      }
803    }
804  }
805
806  // Before falling back to our general case, check if the unscaled
807  // instructions can handle this. If so, that's preferable.
808  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
809    return false;
810
811  // Base only. The address will be materialized into a register before
812  // the memory is accessed.
813  //    add x0, Xbase, #offset
814  //    ldr x0, [x0]
815  Base = N;
816  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
817  return true;
818}
819
820/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
821/// immediate" address.  This should only match when there is an offset that
822/// is not valid for a scaled immediate addressing mode.  The "Size" argument
823/// is the size in bytes of the memory reference, which is needed here to know
824/// what is valid for a scaled immediate.
825bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
826                                                 SDValue &Base,
827                                                 SDValue &OffImm) {
828  if (!CurDAG->isBaseWithConstantOffset(N))
829    return false;
830  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
831    int64_t RHSC = RHS->getSExtValue();
832    // If the offset is valid as a scaled immediate, don't match here.
833    if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
834        RHSC < (0x1000 << Log2_32(Size)))
835      return false;
836    if (RHSC >= -256 && RHSC < 256) {
837      Base = N.getOperand(0);
838      if (Base.getOpcode() == ISD::FrameIndex) {
839        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
840        const TargetLowering *TLI = getTargetLowering();
841        Base = CurDAG->getTargetFrameIndex(
842            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
843      }
844      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
845      return true;
846    }
847  }
848  return false;
849}
850
851static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
852  SDLoc dl(N);
853  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
854  SDValue ImpDef = SDValue(
855      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
856  MachineSDNode *Node = CurDAG->getMachineNode(
857      TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
858  return SDValue(Node, 0);
859}
860
861/// Check if the given SHL node (\p N), can be used to form an
862/// extended register for an addressing mode.
863bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
864                                            bool WantExtend, SDValue &Offset,
865                                            SDValue &SignExtend) {
866  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
867  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
868  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
869    return false;
870
871  SDLoc dl(N);
872  if (WantExtend) {
873    AArch64_AM::ShiftExtendType Ext =
874        getExtendTypeForNode(N.getOperand(0), true);
875    if (Ext == AArch64_AM::InvalidShiftExtend)
876      return false;
877
878    Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
879    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
880                                           MVT::i32);
881  } else {
882    Offset = N.getOperand(0);
883    SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
884  }
885
886  unsigned LegalShiftVal = Log2_32(Size);
887  unsigned ShiftVal = CSD->getZExtValue();
888
889  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
890    return false;
891
892  return isWorthFolding(N);
893}
894
895bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
896                                            SDValue &Base, SDValue &Offset,
897                                            SDValue &SignExtend,
898                                            SDValue &DoShift) {
899  if (N.getOpcode() != ISD::ADD)
900    return false;
901  SDValue LHS = N.getOperand(0);
902  SDValue RHS = N.getOperand(1);
903  SDLoc dl(N);
904
905  // We don't want to match immediate adds here, because they are better lowered
906  // to the register-immediate addressing modes.
907  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
908    return false;
909
910  // Check if this particular node is reused in any non-memory related
911  // operation.  If yes, do not try to fold this node into the address
912  // computation, since the computation will be kept.
913  const SDNode *Node = N.getNode();
914  for (SDNode *UI : Node->uses()) {
915    if (!isa<MemSDNode>(*UI))
916      return false;
917  }
918
919  // Remember if it is worth folding N when it produces extended register.
920  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
921
922  // Try to match a shifted extend on the RHS.
923  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
924      SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
925    Base = LHS;
926    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
927    return true;
928  }
929
930  // Try to match a shifted extend on the LHS.
931  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
932      SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
933    Base = RHS;
934    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
935    return true;
936  }
937
938  // There was no shift, whatever else we find.
939  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
940
941  AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
942  // Try to match an unshifted extend on the LHS.
943  if (IsExtendedRegisterWorthFolding &&
944      (Ext = getExtendTypeForNode(LHS, true)) !=
945          AArch64_AM::InvalidShiftExtend) {
946    Base = RHS;
947    Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
948    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
949                                           MVT::i32);
950    if (isWorthFolding(LHS))
951      return true;
952  }
953
954  // Try to match an unshifted extend on the RHS.
955  if (IsExtendedRegisterWorthFolding &&
956      (Ext = getExtendTypeForNode(RHS, true)) !=
957          AArch64_AM::InvalidShiftExtend) {
958    Base = LHS;
959    Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
960    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
961                                           MVT::i32);
962    if (isWorthFolding(RHS))
963      return true;
964  }
965
966  return false;
967}
968
969// Check if the given immediate is preferred by ADD. If an immediate can be
970// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
971// encoded by one MOVZ, return true.
972static bool isPreferredADD(int64_t ImmOff) {
973  // Constant in [0x0, 0xfff] can be encoded in ADD.
974  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
975    return true;
976  // Check if it can be encoded in an "ADD LSL #12".
977  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
978    // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
979    return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
980           (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
981  return false;
982}
983
984bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
985                                            SDValue &Base, SDValue &Offset,
986                                            SDValue &SignExtend,
987                                            SDValue &DoShift) {
988  if (N.getOpcode() != ISD::ADD)
989    return false;
990  SDValue LHS = N.getOperand(0);
991  SDValue RHS = N.getOperand(1);
992  SDLoc DL(N);
993
994  // Check if this particular node is reused in any non-memory related
995  // operation.  If yes, do not try to fold this node into the address
996  // computation, since the computation will be kept.
997  const SDNode *Node = N.getNode();
998  for (SDNode *UI : Node->uses()) {
999    if (!isa<MemSDNode>(*UI))
1000      return false;
1001  }
1002
1003  // Watch out if RHS is a wide immediate, it can not be selected into
1004  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1005  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1006  // instructions like:
1007  //     MOV  X0, WideImmediate
1008  //     ADD  X1, BaseReg, X0
1009  //     LDR  X2, [X1, 0]
1010  // For such situation, using [BaseReg, XReg] addressing mode can save one
1011  // ADD/SUB:
1012  //     MOV  X0, WideImmediate
1013  //     LDR  X2, [BaseReg, X0]
1014  if (isa<ConstantSDNode>(RHS)) {
1015    int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1016    unsigned Scale = Log2_32(Size);
1017    // Skip the immediate can be selected by load/store addressing mode.
1018    // Also skip the immediate can be encoded by a single ADD (SUB is also
1019    // checked by using -ImmOff).
1020    if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1021        isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1022      return false;
1023
1024    SDValue Ops[] = { RHS };
1025    SDNode *MOVI =
1026        CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1027    SDValue MOVIV = SDValue(MOVI, 0);
1028    // This ADD of two X register will be selected into [Reg+Reg] mode.
1029    N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1030  }
1031
1032  // Remember if it is worth folding N when it produces extended register.
1033  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1034
1035  // Try to match a shifted extend on the RHS.
1036  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1037      SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1038    Base = LHS;
1039    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1040    return true;
1041  }
1042
1043  // Try to match a shifted extend on the LHS.
1044  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1045      SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1046    Base = RHS;
1047    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1048    return true;
1049  }
1050
1051  // Match any non-shifted, non-extend, non-immediate add expression.
1052  Base = LHS;
1053  Offset = RHS;
1054  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1055  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1056  // Reg1 + Reg2 is free: no check needed.
1057  return true;
1058}
1059
1060SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1061  static const unsigned RegClassIDs[] = {
1062      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1063  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1064                                     AArch64::dsub2, AArch64::dsub3};
1065
1066  return createTuple(Regs, RegClassIDs, SubRegs);
1067}
1068
1069SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1070  static const unsigned RegClassIDs[] = {
1071      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1072  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1073                                     AArch64::qsub2, AArch64::qsub3};
1074
1075  return createTuple(Regs, RegClassIDs, SubRegs);
1076}
1077
1078SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1079                                         const unsigned RegClassIDs[],
1080                                         const unsigned SubRegs[]) {
1081  // There's no special register-class for a vector-list of 1 element: it's just
1082  // a vector.
1083  if (Regs.size() == 1)
1084    return Regs[0];
1085
1086  assert(Regs.size() >= 2 && Regs.size() <= 4);
1087
1088  SDLoc DL(Regs[0]);
1089
1090  SmallVector<SDValue, 4> Ops;
1091
1092  // First operand of REG_SEQUENCE is the desired RegClass.
1093  Ops.push_back(
1094      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1095
1096  // Then we get pairs of source & subregister-position for the components.
1097  for (unsigned i = 0; i < Regs.size(); ++i) {
1098    Ops.push_back(Regs[i]);
1099    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1100  }
1101
1102  SDNode *N =
1103      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1104  return SDValue(N, 0);
1105}
1106
1107void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1108                                      bool isExt) {
1109  SDLoc dl(N);
1110  EVT VT = N->getValueType(0);
1111
1112  unsigned ExtOff = isExt;
1113
1114  // Form a REG_SEQUENCE to force register allocation.
1115  unsigned Vec0Off = ExtOff + 1;
1116  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1117                               N->op_begin() + Vec0Off + NumVecs);
1118  SDValue RegSeq = createQTuple(Regs);
1119
1120  SmallVector<SDValue, 6> Ops;
1121  if (isExt)
1122    Ops.push_back(N->getOperand(1));
1123  Ops.push_back(RegSeq);
1124  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1125  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1126}
1127
1128bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1129  LoadSDNode *LD = cast<LoadSDNode>(N);
1130  if (LD->isUnindexed())
1131    return false;
1132  EVT VT = LD->getMemoryVT();
1133  EVT DstVT = N->getValueType(0);
1134  ISD::MemIndexedMode AM = LD->getAddressingMode();
1135  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1136
1137  // We're not doing validity checking here. That was done when checking
1138  // if we should mark the load as indexed or not. We're just selecting
1139  // the right instruction.
1140  unsigned Opcode = 0;
1141
1142  ISD::LoadExtType ExtType = LD->getExtensionType();
1143  bool InsertTo64 = false;
1144  if (VT == MVT::i64)
1145    Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1146  else if (VT == MVT::i32) {
1147    if (ExtType == ISD::NON_EXTLOAD)
1148      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1149    else if (ExtType == ISD::SEXTLOAD)
1150      Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1151    else {
1152      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1153      InsertTo64 = true;
1154      // The result of the load is only i32. It's the subreg_to_reg that makes
1155      // it into an i64.
1156      DstVT = MVT::i32;
1157    }
1158  } else if (VT == MVT::i16) {
1159    if (ExtType == ISD::SEXTLOAD) {
1160      if (DstVT == MVT::i64)
1161        Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1162      else
1163        Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1164    } else {
1165      Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1166      InsertTo64 = DstVT == MVT::i64;
1167      // The result of the load is only i32. It's the subreg_to_reg that makes
1168      // it into an i64.
1169      DstVT = MVT::i32;
1170    }
1171  } else if (VT == MVT::i8) {
1172    if (ExtType == ISD::SEXTLOAD) {
1173      if (DstVT == MVT::i64)
1174        Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1175      else
1176        Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1177    } else {
1178      Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1179      InsertTo64 = DstVT == MVT::i64;
1180      // The result of the load is only i32. It's the subreg_to_reg that makes
1181      // it into an i64.
1182      DstVT = MVT::i32;
1183    }
1184  } else if (VT == MVT::f16) {
1185    Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1186  } else if (VT == MVT::f32) {
1187    Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1188  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1189    Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1190  } else if (VT.is128BitVector()) {
1191    Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1192  } else
1193    return false;
1194  SDValue Chain = LD->getChain();
1195  SDValue Base = LD->getBasePtr();
1196  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1197  int OffsetVal = (int)OffsetOp->getZExtValue();
1198  SDLoc dl(N);
1199  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1200  SDValue Ops[] = { Base, Offset, Chain };
1201  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1202                                       MVT::Other, Ops);
1203  // Either way, we're replacing the node, so tell the caller that.
1204  SDValue LoadedVal = SDValue(Res, 1);
1205  if (InsertTo64) {
1206    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1207    LoadedVal =
1208        SDValue(CurDAG->getMachineNode(
1209                    AArch64::SUBREG_TO_REG, dl, MVT::i64,
1210                    CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1211                    SubReg),
1212                0);
1213  }
1214
1215  ReplaceUses(SDValue(N, 0), LoadedVal);
1216  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1217  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1218  CurDAG->RemoveDeadNode(N);
1219  return true;
1220}
1221
1222void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1223                                     unsigned SubRegIdx) {
1224  SDLoc dl(N);
1225  EVT VT = N->getValueType(0);
1226  SDValue Chain = N->getOperand(0);
1227
1228  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1229                   Chain};
1230
1231  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1232
1233  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1234  SDValue SuperReg = SDValue(Ld, 0);
1235  for (unsigned i = 0; i < NumVecs; ++i)
1236    ReplaceUses(SDValue(N, i),
1237        CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1238
1239  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1240
1241  // Transfer memoperands.
1242  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1243  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1244
1245  CurDAG->RemoveDeadNode(N);
1246}
1247
1248void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1249                                         unsigned Opc, unsigned SubRegIdx) {
1250  SDLoc dl(N);
1251  EVT VT = N->getValueType(0);
1252  SDValue Chain = N->getOperand(0);
1253
1254  SDValue Ops[] = {N->getOperand(1), // Mem operand
1255                   N->getOperand(2), // Incremental
1256                   Chain};
1257
1258  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1259                        MVT::Untyped, MVT::Other};
1260
1261  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1262
1263  // Update uses of write back register
1264  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1265
1266  // Update uses of vector list
1267  SDValue SuperReg = SDValue(Ld, 1);
1268  if (NumVecs == 1)
1269    ReplaceUses(SDValue(N, 0), SuperReg);
1270  else
1271    for (unsigned i = 0; i < NumVecs; ++i)
1272      ReplaceUses(SDValue(N, i),
1273          CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1274
1275  // Update the chain
1276  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1277  CurDAG->RemoveDeadNode(N);
1278}
1279
1280void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1281                                      unsigned Opc) {
1282  SDLoc dl(N);
1283  EVT VT = N->getOperand(2)->getValueType(0);
1284
1285  // Form a REG_SEQUENCE to force register allocation.
1286  bool Is128Bit = VT.getSizeInBits() == 128;
1287  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1288  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1289
1290  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1291  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1292
1293  // Transfer memoperands.
1294  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1295  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1296
1297  ReplaceNode(N, St);
1298}
1299
1300void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1301                                          unsigned Opc) {
1302  SDLoc dl(N);
1303  EVT VT = N->getOperand(2)->getValueType(0);
1304  const EVT ResTys[] = {MVT::i64,    // Type of the write back register
1305                        MVT::Other}; // Type for the Chain
1306
1307  // Form a REG_SEQUENCE to force register allocation.
1308  bool Is128Bit = VT.getSizeInBits() == 128;
1309  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1310  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1311
1312  SDValue Ops[] = {RegSeq,
1313                   N->getOperand(NumVecs + 1), // base register
1314                   N->getOperand(NumVecs + 2), // Incremental
1315                   N->getOperand(0)};          // Chain
1316  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1317
1318  ReplaceNode(N, St);
1319}
1320
1321namespace {
1322/// WidenVector - Given a value in the V64 register class, produce the
1323/// equivalent value in the V128 register class.
1324class WidenVector {
1325  SelectionDAG &DAG;
1326
1327public:
1328  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1329
1330  SDValue operator()(SDValue V64Reg) {
1331    EVT VT = V64Reg.getValueType();
1332    unsigned NarrowSize = VT.getVectorNumElements();
1333    MVT EltTy = VT.getVectorElementType().getSimpleVT();
1334    MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1335    SDLoc DL(V64Reg);
1336
1337    SDValue Undef =
1338        SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1339    return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1340  }
1341};
1342} // namespace
1343
1344/// NarrowVector - Given a value in the V128 register class, produce the
1345/// equivalent value in the V64 register class.
1346static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1347  EVT VT = V128Reg.getValueType();
1348  unsigned WideSize = VT.getVectorNumElements();
1349  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1350  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1351
1352  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1353                                    V128Reg);
1354}
1355
1356void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1357                                         unsigned Opc) {
1358  SDLoc dl(N);
1359  EVT VT = N->getValueType(0);
1360  bool Narrow = VT.getSizeInBits() == 64;
1361
1362  // Form a REG_SEQUENCE to force register allocation.
1363  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1364
1365  if (Narrow)
1366    transform(Regs, Regs.begin(),
1367                   WidenVector(*CurDAG));
1368
1369  SDValue RegSeq = createQTuple(Regs);
1370
1371  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1372
1373  unsigned LaneNo =
1374      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1375
1376  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1377                   N->getOperand(NumVecs + 3), N->getOperand(0)};
1378  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1379  SDValue SuperReg = SDValue(Ld, 0);
1380
1381  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1382  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1383                                    AArch64::qsub2, AArch64::qsub3 };
1384  for (unsigned i = 0; i < NumVecs; ++i) {
1385    SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1386    if (Narrow)
1387      NV = NarrowVector(NV, *CurDAG);
1388    ReplaceUses(SDValue(N, i), NV);
1389  }
1390
1391  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1392  CurDAG->RemoveDeadNode(N);
1393}
1394
1395void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1396                                             unsigned Opc) {
1397  SDLoc dl(N);
1398  EVT VT = N->getValueType(0);
1399  bool Narrow = VT.getSizeInBits() == 64;
1400
1401  // Form a REG_SEQUENCE to force register allocation.
1402  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1403
1404  if (Narrow)
1405    transform(Regs, Regs.begin(),
1406                   WidenVector(*CurDAG));
1407
1408  SDValue RegSeq = createQTuple(Regs);
1409
1410  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1411                        RegSeq->getValueType(0), MVT::Other};
1412
1413  unsigned LaneNo =
1414      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1415
1416  SDValue Ops[] = {RegSeq,
1417                   CurDAG->getTargetConstant(LaneNo, dl,
1418                                             MVT::i64),         // Lane Number
1419                   N->getOperand(NumVecs + 2),                  // Base register
1420                   N->getOperand(NumVecs + 3),                  // Incremental
1421                   N->getOperand(0)};
1422  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1423
1424  // Update uses of the write back register
1425  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1426
1427  // Update uses of the vector list
1428  SDValue SuperReg = SDValue(Ld, 1);
1429  if (NumVecs == 1) {
1430    ReplaceUses(SDValue(N, 0),
1431                Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1432  } else {
1433    EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1434    static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1435                                      AArch64::qsub2, AArch64::qsub3 };
1436    for (unsigned i = 0; i < NumVecs; ++i) {
1437      SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1438                                                  SuperReg);
1439      if (Narrow)
1440        NV = NarrowVector(NV, *CurDAG);
1441      ReplaceUses(SDValue(N, i), NV);
1442    }
1443  }
1444
1445  // Update the Chain
1446  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1447  CurDAG->RemoveDeadNode(N);
1448}
1449
1450void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1451                                          unsigned Opc) {
1452  SDLoc dl(N);
1453  EVT VT = N->getOperand(2)->getValueType(0);
1454  bool Narrow = VT.getSizeInBits() == 64;
1455
1456  // Form a REG_SEQUENCE to force register allocation.
1457  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1458
1459  if (Narrow)
1460    transform(Regs, Regs.begin(),
1461                   WidenVector(*CurDAG));
1462
1463  SDValue RegSeq = createQTuple(Regs);
1464
1465  unsigned LaneNo =
1466      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1467
1468  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1469                   N->getOperand(NumVecs + 3), N->getOperand(0)};
1470  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1471
1472  // Transfer memoperands.
1473  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1474  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1475
1476  ReplaceNode(N, St);
1477}
1478
1479void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1480                                              unsigned Opc) {
1481  SDLoc dl(N);
1482  EVT VT = N->getOperand(2)->getValueType(0);
1483  bool Narrow = VT.getSizeInBits() == 64;
1484
1485  // Form a REG_SEQUENCE to force register allocation.
1486  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1487
1488  if (Narrow)
1489    transform(Regs, Regs.begin(),
1490                   WidenVector(*CurDAG));
1491
1492  SDValue RegSeq = createQTuple(Regs);
1493
1494  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1495                        MVT::Other};
1496
1497  unsigned LaneNo =
1498      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1499
1500  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1501                   N->getOperand(NumVecs + 2), // Base Register
1502                   N->getOperand(NumVecs + 3), // Incremental
1503                   N->getOperand(0)};
1504  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1505
1506  // Transfer memoperands.
1507  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1508  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1509
1510  ReplaceNode(N, St);
1511}
1512
1513static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
1514                                       unsigned &Opc, SDValue &Opd0,
1515                                       unsigned &LSB, unsigned &MSB,
1516                                       unsigned NumberOfIgnoredLowBits,
1517                                       bool BiggerPattern) {
1518  assert(N->getOpcode() == ISD::AND &&
1519         "N must be a AND operation to call this function");
1520
1521  EVT VT = N->getValueType(0);
1522
1523  // Here we can test the type of VT and return false when the type does not
1524  // match, but since it is done prior to that call in the current context
1525  // we turned that into an assert to avoid redundant code.
1526  assert((VT == MVT::i32 || VT == MVT::i64) &&
1527         "Type checking must have been done before calling this function");
1528
1529  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1530  // changed the AND node to a 32-bit mask operation. We'll have to
1531  // undo that as part of the transform here if we want to catch all
1532  // the opportunities.
1533  // Currently the NumberOfIgnoredLowBits argument helps to recover
1534  // form these situations when matching bigger pattern (bitfield insert).
1535
1536  // For unsigned extracts, check for a shift right and mask
1537  uint64_t AndImm = 0;
1538  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1539    return false;
1540
1541  const SDNode *Op0 = N->getOperand(0).getNode();
1542
1543  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1544  // simplified. Try to undo that
1545  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1546
1547  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1548  if (AndImm & (AndImm + 1))
1549    return false;
1550
1551  bool ClampMSB = false;
1552  uint64_t SrlImm = 0;
1553  // Handle the SRL + ANY_EXTEND case.
1554  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1555      isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1556    // Extend the incoming operand of the SRL to 64-bit.
1557    Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1558    // Make sure to clamp the MSB so that we preserve the semantics of the
1559    // original operations.
1560    ClampMSB = true;
1561  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1562             isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
1563                                   SrlImm)) {
1564    // If the shift result was truncated, we can still combine them.
1565    Opd0 = Op0->getOperand(0).getOperand(0);
1566
1567    // Use the type of SRL node.
1568    VT = Opd0->getValueType(0);
1569  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1570    Opd0 = Op0->getOperand(0);
1571  } else if (BiggerPattern) {
1572    // Let's pretend a 0 shift right has been performed.
1573    // The resulting code will be at least as good as the original one
1574    // plus it may expose more opportunities for bitfield insert pattern.
1575    // FIXME: Currently we limit this to the bigger pattern, because
1576    // some optimizations expect AND and not UBFM.
1577    Opd0 = N->getOperand(0);
1578  } else
1579    return false;
1580
1581  // Bail out on large immediates. This happens when no proper
1582  // combining/constant folding was performed.
1583  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1584    LLVM_DEBUG(
1585        (dbgs() << N
1586                << ": Found large shift immediate, this should not happen\n"));
1587    return false;
1588  }
1589
1590  LSB = SrlImm;
1591  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1592                                 : countTrailingOnes<uint64_t>(AndImm)) -
1593        1;
1594  if (ClampMSB)
1595    // Since we're moving the extend before the right shift operation, we need
1596    // to clamp the MSB to make sure we don't shift in undefined bits instead of
1597    // the zeros which would get shifted in with the original right shift
1598    // operation.
1599    MSB = MSB > 31 ? 31 : MSB;
1600
1601  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1602  return true;
1603}
1604
1605static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1606                                             SDValue &Opd0, unsigned &Immr,
1607                                             unsigned &Imms) {
1608  assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
1609
1610  EVT VT = N->getValueType(0);
1611  unsigned BitWidth = VT.getSizeInBits();
1612  assert((VT == MVT::i32 || VT == MVT::i64) &&
1613         "Type checking must have been done before calling this function");
1614
1615  SDValue Op = N->getOperand(0);
1616  if (Op->getOpcode() == ISD::TRUNCATE) {
1617    Op = Op->getOperand(0);
1618    VT = Op->getValueType(0);
1619    BitWidth = VT.getSizeInBits();
1620  }
1621
1622  uint64_t ShiftImm;
1623  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1624      !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1625    return false;
1626
1627  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1628  if (ShiftImm + Width > BitWidth)
1629    return false;
1630
1631  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1632  Opd0 = Op.getOperand(0);
1633  Immr = ShiftImm;
1634  Imms = ShiftImm + Width - 1;
1635  return true;
1636}
1637
1638static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1639                                          SDValue &Opd0, unsigned &LSB,
1640                                          unsigned &MSB) {
1641  // We are looking for the following pattern which basically extracts several
1642  // continuous bits from the source value and places it from the LSB of the
1643  // destination value, all other bits of the destination value or set to zero:
1644  //
1645  // Value2 = AND Value, MaskImm
1646  // SRL Value2, ShiftImm
1647  //
1648  // with MaskImm >> ShiftImm to search for the bit width.
1649  //
1650  // This gets selected into a single UBFM:
1651  //
1652  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1653  //
1654
1655  if (N->getOpcode() != ISD::SRL)
1656    return false;
1657
1658  uint64_t AndMask = 0;
1659  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1660    return false;
1661
1662  Opd0 = N->getOperand(0).getOperand(0);
1663
1664  uint64_t SrlImm = 0;
1665  if (!isIntImmediate(N->getOperand(1), SrlImm))
1666    return false;
1667
1668  // Check whether we really have several bits extract here.
1669  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1670  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1671    if (N->getValueType(0) == MVT::i32)
1672      Opc = AArch64::UBFMWri;
1673    else
1674      Opc = AArch64::UBFMXri;
1675
1676    LSB = SrlImm;
1677    MSB = BitWide + SrlImm - 1;
1678    return true;
1679  }
1680
1681  return false;
1682}
1683
1684static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1685                                       unsigned &Immr, unsigned &Imms,
1686                                       bool BiggerPattern) {
1687  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1688         "N must be a SHR/SRA operation to call this function");
1689
1690  EVT VT = N->getValueType(0);
1691
1692  // Here we can test the type of VT and return false when the type does not
1693  // match, but since it is done prior to that call in the current context
1694  // we turned that into an assert to avoid redundant code.
1695  assert((VT == MVT::i32 || VT == MVT::i64) &&
1696         "Type checking must have been done before calling this function");
1697
1698  // Check for AND + SRL doing several bits extract.
1699  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1700    return true;
1701
1702  // We're looking for a shift of a shift.
1703  uint64_t ShlImm = 0;
1704  uint64_t TruncBits = 0;
1705  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1706    Opd0 = N->getOperand(0).getOperand(0);
1707  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1708             N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1709    // We are looking for a shift of truncate. Truncate from i64 to i32 could
1710    // be considered as setting high 32 bits as zero. Our strategy here is to
1711    // always generate 64bit UBFM. This consistency will help the CSE pass
1712    // later find more redundancy.
1713    Opd0 = N->getOperand(0).getOperand(0);
1714    TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1715    VT = Opd0.getValueType();
1716    assert(VT == MVT::i64 && "the promoted type should be i64");
1717  } else if (BiggerPattern) {
1718    // Let's pretend a 0 shift left has been performed.
1719    // FIXME: Currently we limit this to the bigger pattern case,
1720    // because some optimizations expect AND and not UBFM
1721    Opd0 = N->getOperand(0);
1722  } else
1723    return false;
1724
1725  // Missing combines/constant folding may have left us with strange
1726  // constants.
1727  if (ShlImm >= VT.getSizeInBits()) {
1728    LLVM_DEBUG(
1729        (dbgs() << N
1730                << ": Found large shift immediate, this should not happen\n"));
1731    return false;
1732  }
1733
1734  uint64_t SrlImm = 0;
1735  if (!isIntImmediate(N->getOperand(1), SrlImm))
1736    return false;
1737
1738  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1739         "bad amount in shift node!");
1740  int immr = SrlImm - ShlImm;
1741  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1742  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1743  // SRA requires a signed extraction
1744  if (VT == MVT::i32)
1745    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1746  else
1747    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1748  return true;
1749}
1750
1751bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1752  assert(N->getOpcode() == ISD::SIGN_EXTEND);
1753
1754  EVT VT = N->getValueType(0);
1755  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1756  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1757    return false;
1758
1759  uint64_t ShiftImm;
1760  SDValue Op = N->getOperand(0);
1761  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1762    return false;
1763
1764  SDLoc dl(N);
1765  // Extend the incoming operand of the shift to 64-bits.
1766  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1767  unsigned Immr = ShiftImm;
1768  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1769  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1770                   CurDAG->getTargetConstant(Imms, dl, VT)};
1771  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1772  return true;
1773}
1774
1775static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1776                                SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1777                                unsigned NumberOfIgnoredLowBits = 0,
1778                                bool BiggerPattern = false) {
1779  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1780    return false;
1781
1782  switch (N->getOpcode()) {
1783  default:
1784    if (!N->isMachineOpcode())
1785      return false;
1786    break;
1787  case ISD::AND:
1788    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1789                                      NumberOfIgnoredLowBits, BiggerPattern);
1790  case ISD::SRL:
1791  case ISD::SRA:
1792    return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1793
1794  case ISD::SIGN_EXTEND_INREG:
1795    return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1796  }
1797
1798  unsigned NOpc = N->getMachineOpcode();
1799  switch (NOpc) {
1800  default:
1801    return false;
1802  case AArch64::SBFMWri:
1803  case AArch64::UBFMWri:
1804  case AArch64::SBFMXri:
1805  case AArch64::UBFMXri:
1806    Opc = NOpc;
1807    Opd0 = N->getOperand(0);
1808    Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1809    Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1810    return true;
1811  }
1812  // Unreachable
1813  return false;
1814}
1815
1816bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1817  unsigned Opc, Immr, Imms;
1818  SDValue Opd0;
1819  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1820    return false;
1821
1822  EVT VT = N->getValueType(0);
1823  SDLoc dl(N);
1824
1825  // If the bit extract operation is 64bit but the original type is 32bit, we
1826  // need to add one EXTRACT_SUBREG.
1827  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1828    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1829                       CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1830
1831    SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1832    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1833    ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1834                                          MVT::i32, SDValue(BFM, 0), SubReg));
1835    return true;
1836  }
1837
1838  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1839                   CurDAG->getTargetConstant(Imms, dl, VT)};
1840  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1841  return true;
1842}
1843
1844/// Does DstMask form a complementary pair with the mask provided by
1845/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1846/// this asks whether DstMask zeroes precisely those bits that will be set by
1847/// the other half.
1848static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1849                              unsigned NumberOfIgnoredHighBits, EVT VT) {
1850  assert((VT == MVT::i32 || VT == MVT::i64) &&
1851         "i32 or i64 mask type expected!");
1852  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1853
1854  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1855  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1856
1857  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1858         (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1859}
1860
1861// Look for bits that will be useful for later uses.
1862// A bit is consider useless as soon as it is dropped and never used
1863// before it as been dropped.
1864// E.g., looking for useful bit of x
1865// 1. y = x & 0x7
1866// 2. z = y >> 2
1867// After #1, x useful bits are 0x7, then the useful bits of x, live through
1868// y.
1869// After #2, the useful bits of x are 0x4.
1870// However, if x is used on an unpredicatable instruction, then all its bits
1871// are useful.
1872// E.g.
1873// 1. y = x & 0x7
1874// 2. z = y >> 2
1875// 3. str x, [@x]
1876static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1877
1878static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
1879                                              unsigned Depth) {
1880  uint64_t Imm =
1881      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1882  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1883  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1884  getUsefulBits(Op, UsefulBits, Depth + 1);
1885}
1886
1887static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1888                                             uint64_t Imm, uint64_t MSB,
1889                                             unsigned Depth) {
1890  // inherit the bitwidth value
1891  APInt OpUsefulBits(UsefulBits);
1892  OpUsefulBits = 1;
1893
1894  if (MSB >= Imm) {
1895    OpUsefulBits <<= MSB - Imm + 1;
1896    --OpUsefulBits;
1897    // The interesting part will be in the lower part of the result
1898    getUsefulBits(Op, OpUsefulBits, Depth + 1);
1899    // The interesting part was starting at Imm in the argument
1900    OpUsefulBits <<= Imm;
1901  } else {
1902    OpUsefulBits <<= MSB + 1;
1903    --OpUsefulBits;
1904    // The interesting part will be shifted in the result
1905    OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1906    getUsefulBits(Op, OpUsefulBits, Depth + 1);
1907    // The interesting part was at zero in the argument
1908    OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1909  }
1910
1911  UsefulBits &= OpUsefulBits;
1912}
1913
1914static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1915                                  unsigned Depth) {
1916  uint64_t Imm =
1917      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1918  uint64_t MSB =
1919      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1920
1921  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1922}
1923
1924static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
1925                                              unsigned Depth) {
1926  uint64_t ShiftTypeAndValue =
1927      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1928  APInt Mask(UsefulBits);
1929  Mask.clearAllBits();
1930  Mask.flipAllBits();
1931
1932  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1933    // Shift Left
1934    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1935    Mask <<= ShiftAmt;
1936    getUsefulBits(Op, Mask, Depth + 1);
1937    Mask.lshrInPlace(ShiftAmt);
1938  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1939    // Shift Right
1940    // We do not handle AArch64_AM::ASR, because the sign will change the
1941    // number of useful bits
1942    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1943    Mask.lshrInPlace(ShiftAmt);
1944    getUsefulBits(Op, Mask, Depth + 1);
1945    Mask <<= ShiftAmt;
1946  } else
1947    return;
1948
1949  UsefulBits &= Mask;
1950}
1951
1952static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1953                                 unsigned Depth) {
1954  uint64_t Imm =
1955      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1956  uint64_t MSB =
1957      cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1958
1959  APInt OpUsefulBits(UsefulBits);
1960  OpUsefulBits = 1;
1961
1962  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1963  ResultUsefulBits.flipAllBits();
1964  APInt Mask(UsefulBits.getBitWidth(), 0);
1965
1966  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1967
1968  if (MSB >= Imm) {
1969    // The instruction is a BFXIL.
1970    uint64_t Width = MSB - Imm + 1;
1971    uint64_t LSB = Imm;
1972
1973    OpUsefulBits <<= Width;
1974    --OpUsefulBits;
1975
1976    if (Op.getOperand(1) == Orig) {
1977      // Copy the low bits from the result to bits starting from LSB.
1978      Mask = ResultUsefulBits & OpUsefulBits;
1979      Mask <<= LSB;
1980    }
1981
1982    if (Op.getOperand(0) == Orig)
1983      // Bits starting from LSB in the input contribute to the result.
1984      Mask |= (ResultUsefulBits & ~OpUsefulBits);
1985  } else {
1986    // The instruction is a BFI.
1987    uint64_t Width = MSB + 1;
1988    uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1989
1990    OpUsefulBits <<= Width;
1991    --OpUsefulBits;
1992    OpUsefulBits <<= LSB;
1993
1994    if (Op.getOperand(1) == Orig) {
1995      // Copy the bits from the result to the zero bits.
1996      Mask = ResultUsefulBits & OpUsefulBits;
1997      Mask.lshrInPlace(LSB);
1998    }
1999
2000    if (Op.getOperand(0) == Orig)
2001      Mask |= (ResultUsefulBits & ~OpUsefulBits);
2002  }
2003
2004  UsefulBits &= Mask;
2005}
2006
2007static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2008                                SDValue Orig, unsigned Depth) {
2009
2010  // Users of this node should have already been instruction selected
2011  // FIXME: Can we turn that into an assert?
2012  if (!UserNode->isMachineOpcode())
2013    return;
2014
2015  switch (UserNode->getMachineOpcode()) {
2016  default:
2017    return;
2018  case AArch64::ANDSWri:
2019  case AArch64::ANDSXri:
2020  case AArch64::ANDWri:
2021  case AArch64::ANDXri:
2022    // We increment Depth only when we call the getUsefulBits
2023    return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2024                                             Depth);
2025  case AArch64::UBFMWri:
2026  case AArch64::UBFMXri:
2027    return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2028
2029  case AArch64::ORRWrs:
2030  case AArch64::ORRXrs:
2031    if (UserNode->getOperand(1) != Orig)
2032      return;
2033    return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2034                                             Depth);
2035  case AArch64::BFMWri:
2036  case AArch64::BFMXri:
2037    return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2038
2039  case AArch64::STRBBui:
2040  case AArch64::STURBBi:
2041    if (UserNode->getOperand(0) != Orig)
2042      return;
2043    UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2044    return;
2045
2046  case AArch64::STRHHui:
2047  case AArch64::STURHHi:
2048    if (UserNode->getOperand(0) != Orig)
2049      return;
2050    UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2051    return;
2052  }
2053}
2054
2055static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2056  if (Depth >= 6)
2057    return;
2058  // Initialize UsefulBits
2059  if (!Depth) {
2060    unsigned Bitwidth = Op.getScalarValueSizeInBits();
2061    // At the beginning, assume every produced bits is useful
2062    UsefulBits = APInt(Bitwidth, 0);
2063    UsefulBits.flipAllBits();
2064  }
2065  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2066
2067  for (SDNode *Node : Op.getNode()->uses()) {
2068    // A use cannot produce useful bits
2069    APInt UsefulBitsForUse = APInt(UsefulBits);
2070    getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2071    UsersUsefulBits |= UsefulBitsForUse;
2072  }
2073  // UsefulBits contains the produced bits that are meaningful for the
2074  // current definition, thus a user cannot make a bit meaningful at
2075  // this point
2076  UsefulBits &= UsersUsefulBits;
2077}
2078
2079/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2080/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2081/// 0, return Op unchanged.
2082static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2083  if (ShlAmount == 0)
2084    return Op;
2085
2086  EVT VT = Op.getValueType();
2087  SDLoc dl(Op);
2088  unsigned BitWidth = VT.getSizeInBits();
2089  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2090
2091  SDNode *ShiftNode;
2092  if (ShlAmount > 0) {
2093    // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2094    ShiftNode = CurDAG->getMachineNode(
2095        UBFMOpc, dl, VT, Op,
2096        CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2097        CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2098  } else {
2099    // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2100    assert(ShlAmount < 0 && "expected right shift");
2101    int ShrAmount = -ShlAmount;
2102    ShiftNode = CurDAG->getMachineNode(
2103        UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2104        CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2105  }
2106
2107  return SDValue(ShiftNode, 0);
2108}
2109
2110/// Does this tree qualify as an attempt to move a bitfield into position,
2111/// essentially "(and (shl VAL, N), Mask)".
2112static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2113                                    bool BiggerPattern,
2114                                    SDValue &Src, int &ShiftAmount,
2115                                    int &MaskWidth) {
2116  EVT VT = Op.getValueType();
2117  unsigned BitWidth = VT.getSizeInBits();
2118  (void)BitWidth;
2119  assert(BitWidth == 32 || BitWidth == 64);
2120
2121  KnownBits Known = CurDAG->computeKnownBits(Op);
2122
2123  // Non-zero in the sense that they're not provably zero, which is the key
2124  // point if we want to use this value
2125  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2126
2127  // Discard a constant AND mask if present. It's safe because the node will
2128  // already have been factored into the computeKnownBits calculation above.
2129  uint64_t AndImm;
2130  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2131    assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2132    Op = Op.getOperand(0);
2133  }
2134
2135  // Don't match if the SHL has more than one use, since then we'll end up
2136  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2137  if (!BiggerPattern && !Op.hasOneUse())
2138    return false;
2139
2140  uint64_t ShlImm;
2141  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2142    return false;
2143  Op = Op.getOperand(0);
2144
2145  if (!isShiftedMask_64(NonZeroBits))
2146    return false;
2147
2148  ShiftAmount = countTrailingZeros(NonZeroBits);
2149  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2150
2151  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2152  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2153  // amount.  BiggerPattern is true when this pattern is being matched for BFI,
2154  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2155  // which case it is not profitable to insert an extra shift.
2156  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2157    return false;
2158  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2159
2160  return true;
2161}
2162
2163static bool isShiftedMask(uint64_t Mask, EVT VT) {
2164  assert(VT == MVT::i32 || VT == MVT::i64);
2165  if (VT == MVT::i32)
2166    return isShiftedMask_32(Mask);
2167  return isShiftedMask_64(Mask);
2168}
2169
2170// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2171// inserted only sets known zero bits.
2172static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
2173  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2174
2175  EVT VT = N->getValueType(0);
2176  if (VT != MVT::i32 && VT != MVT::i64)
2177    return false;
2178
2179  unsigned BitWidth = VT.getSizeInBits();
2180
2181  uint64_t OrImm;
2182  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2183    return false;
2184
2185  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2186  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2187  // performance neutral.
2188  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2189    return false;
2190
2191  uint64_t MaskImm;
2192  SDValue And = N->getOperand(0);
2193  // Must be a single use AND with an immediate operand.
2194  if (!And.hasOneUse() ||
2195      !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2196    return false;
2197
2198  // Compute the Known Zero for the AND as this allows us to catch more general
2199  // cases than just looking for AND with imm.
2200  KnownBits Known = CurDAG->computeKnownBits(And);
2201
2202  // Non-zero in the sense that they're not provably zero, which is the key
2203  // point if we want to use this value.
2204  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2205
2206  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2207  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2208    return false;
2209
2210  // The bits being inserted must only set those bits that are known to be zero.
2211  if ((OrImm & NotKnownZero) != 0) {
2212    // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2213    // currently handle this case.
2214    return false;
2215  }
2216
2217  // BFI/BFXIL dst, src, #lsb, #width.
2218  int LSB = countTrailingOnes(NotKnownZero);
2219  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2220
2221  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2222  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2223  unsigned ImmS = Width - 1;
2224
2225  // If we're creating a BFI instruction avoid cases where we need more
2226  // instructions to materialize the BFI constant as compared to the original
2227  // ORR.  A BFXIL will use the same constant as the original ORR, so the code
2228  // should be no worse in this case.
2229  bool IsBFI = LSB != 0;
2230  uint64_t BFIImm = OrImm >> LSB;
2231  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2232    // We have a BFI instruction and we know the constant can't be materialized
2233    // with a ORR-immediate with the zero register.
2234    unsigned OrChunks = 0, BFIChunks = 0;
2235    for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2236      if (((OrImm >> Shift) & 0xFFFF) != 0)
2237        ++OrChunks;
2238      if (((BFIImm >> Shift) & 0xFFFF) != 0)
2239        ++BFIChunks;
2240    }
2241    if (BFIChunks > OrChunks)
2242      return false;
2243  }
2244
2245  // Materialize the constant to be inserted.
2246  SDLoc DL(N);
2247  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2248  SDNode *MOVI = CurDAG->getMachineNode(
2249      MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2250
2251  // Create the BFI/BFXIL instruction.
2252  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2253                   CurDAG->getTargetConstant(ImmR, DL, VT),
2254                   CurDAG->getTargetConstant(ImmS, DL, VT)};
2255  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2256  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2257  return true;
2258}
2259
2260static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2261                                      SelectionDAG *CurDAG) {
2262  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2263
2264  EVT VT = N->getValueType(0);
2265  if (VT != MVT::i32 && VT != MVT::i64)
2266    return false;
2267
2268  unsigned BitWidth = VT.getSizeInBits();
2269
2270  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2271  // have the expected shape. Try to undo that.
2272
2273  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2274  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2275
2276  // Given a OR operation, check if we have the following pattern
2277  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2278  //                       isBitfieldExtractOp)
2279  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2280  //                 countTrailingZeros(mask2) == imm2 - imm + 1
2281  // f = d | c
2282  // if yes, replace the OR instruction with:
2283  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2284
2285  // OR is commutative, check all combinations of operand order and values of
2286  // BiggerPattern, i.e.
2287  //     Opd0, Opd1, BiggerPattern=false
2288  //     Opd1, Opd0, BiggerPattern=false
2289  //     Opd0, Opd1, BiggerPattern=true
2290  //     Opd1, Opd0, BiggerPattern=true
2291  // Several of these combinations may match, so check with BiggerPattern=false
2292  // first since that will produce better results by matching more instructions
2293  // and/or inserting fewer extra instructions.
2294  for (int I = 0; I < 4; ++I) {
2295
2296    SDValue Dst, Src;
2297    unsigned ImmR, ImmS;
2298    bool BiggerPattern = I / 2;
2299    SDValue OrOpd0Val = N->getOperand(I % 2);
2300    SDNode *OrOpd0 = OrOpd0Val.getNode();
2301    SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2302    SDNode *OrOpd1 = OrOpd1Val.getNode();
2303
2304    unsigned BFXOpc;
2305    int DstLSB, Width;
2306    if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2307                            NumberOfIgnoredLowBits, BiggerPattern)) {
2308      // Check that the returned opcode is compatible with the pattern,
2309      // i.e., same type and zero extended (U and not S)
2310      if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2311          (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2312        continue;
2313
2314      // Compute the width of the bitfield insertion
2315      DstLSB = 0;
2316      Width = ImmS - ImmR + 1;
2317      // FIXME: This constraint is to catch bitfield insertion we may
2318      // want to widen the pattern if we want to grab general bitfied
2319      // move case
2320      if (Width <= 0)
2321        continue;
2322
2323      // If the mask on the insertee is correct, we have a BFXIL operation. We
2324      // can share the ImmR and ImmS values from the already-computed UBFM.
2325    } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2326                                       BiggerPattern,
2327                                       Src, DstLSB, Width)) {
2328      ImmR = (BitWidth - DstLSB) % BitWidth;
2329      ImmS = Width - 1;
2330    } else
2331      continue;
2332
2333    // Check the second part of the pattern
2334    EVT VT = OrOpd1Val.getValueType();
2335    assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2336
2337    // Compute the Known Zero for the candidate of the first operand.
2338    // This allows to catch more general case than just looking for
2339    // AND with imm. Indeed, simplify-demanded-bits may have removed
2340    // the AND instruction because it proves it was useless.
2341    KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2342
2343    // Check if there is enough room for the second operand to appear
2344    // in the first one
2345    APInt BitsToBeInserted =
2346        APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2347
2348    if ((BitsToBeInserted & ~Known.Zero) != 0)
2349      continue;
2350
2351    // Set the first operand
2352    uint64_t Imm;
2353    if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2354        isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2355      // In that case, we can eliminate the AND
2356      Dst = OrOpd1->getOperand(0);
2357    else
2358      // Maybe the AND has been removed by simplify-demanded-bits
2359      // or is useful because it discards more bits
2360      Dst = OrOpd1Val;
2361
2362    // both parts match
2363    SDLoc DL(N);
2364    SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2365                     CurDAG->getTargetConstant(ImmS, DL, VT)};
2366    unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2367    CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2368    return true;
2369  }
2370
2371  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2372  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2373  // mask (e.g., 0x000ffff0).
2374  uint64_t Mask0Imm, Mask1Imm;
2375  SDValue And0 = N->getOperand(0);
2376  SDValue And1 = N->getOperand(1);
2377  if (And0.hasOneUse() && And1.hasOneUse() &&
2378      isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2379      isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2380      APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2381      (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2382
2383    // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2384    // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2385    // bits to be inserted.
2386    if (isShiftedMask(Mask0Imm, VT)) {
2387      std::swap(And0, And1);
2388      std::swap(Mask0Imm, Mask1Imm);
2389    }
2390
2391    SDValue Src = And1->getOperand(0);
2392    SDValue Dst = And0->getOperand(0);
2393    unsigned LSB = countTrailingZeros(Mask1Imm);
2394    int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2395
2396    // The BFXIL inserts the low-order bits from a source register, so right
2397    // shift the needed bits into place.
2398    SDLoc DL(N);
2399    unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2400    SDNode *LSR = CurDAG->getMachineNode(
2401        ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2402        CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2403
2404    // BFXIL is an alias of BFM, so translate to BFM operands.
2405    unsigned ImmR = (BitWidth - LSB) % BitWidth;
2406    unsigned ImmS = Width - 1;
2407
2408    // Create the BFXIL instruction.
2409    SDValue Ops[] = {Dst, SDValue(LSR, 0),
2410                     CurDAG->getTargetConstant(ImmR, DL, VT),
2411                     CurDAG->getTargetConstant(ImmS, DL, VT)};
2412    unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2413    CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2414    return true;
2415  }
2416
2417  return false;
2418}
2419
2420bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2421  if (N->getOpcode() != ISD::OR)
2422    return false;
2423
2424  APInt NUsefulBits;
2425  getUsefulBits(SDValue(N, 0), NUsefulBits);
2426
2427  // If all bits are not useful, just return UNDEF.
2428  if (!NUsefulBits) {
2429    CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2430    return true;
2431  }
2432
2433  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2434    return true;
2435
2436  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2437}
2438
2439/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2440/// equivalent of a left shift by a constant amount followed by an and masking
2441/// out a contiguous set of bits.
2442bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2443  if (N->getOpcode() != ISD::AND)
2444    return false;
2445
2446  EVT VT = N->getValueType(0);
2447  if (VT != MVT::i32 && VT != MVT::i64)
2448    return false;
2449
2450  SDValue Op0;
2451  int DstLSB, Width;
2452  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2453                               Op0, DstLSB, Width))
2454    return false;
2455
2456  // ImmR is the rotate right amount.
2457  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2458  // ImmS is the most significant bit of the source to be moved.
2459  unsigned ImmS = Width - 1;
2460
2461  SDLoc DL(N);
2462  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2463                   CurDAG->getTargetConstant(ImmS, DL, VT)};
2464  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2465  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2466  return true;
2467}
2468
2469/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2470/// variable shift/rotate instructions.
2471bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2472  EVT VT = N->getValueType(0);
2473
2474  unsigned Opc;
2475  switch (N->getOpcode()) {
2476  case ISD::ROTR:
2477    Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2478    break;
2479  case ISD::SHL:
2480    Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2481    break;
2482  case ISD::SRL:
2483    Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2484    break;
2485  case ISD::SRA:
2486    Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2487    break;
2488  default:
2489    return false;
2490  }
2491
2492  uint64_t Size;
2493  uint64_t Bits;
2494  if (VT == MVT::i32) {
2495    Bits = 5;
2496    Size = 32;
2497  } else if (VT == MVT::i64) {
2498    Bits = 6;
2499    Size = 64;
2500  } else
2501    return false;
2502
2503  SDValue ShiftAmt = N->getOperand(1);
2504  SDLoc DL(N);
2505  SDValue NewShiftAmt;
2506
2507  // Skip over an extend of the shift amount.
2508  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2509      ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2510    ShiftAmt = ShiftAmt->getOperand(0);
2511
2512  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2513    SDValue Add0 = ShiftAmt->getOperand(0);
2514    SDValue Add1 = ShiftAmt->getOperand(1);
2515    uint64_t Add0Imm;
2516    uint64_t Add1Imm;
2517    // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2518    // to avoid the ADD/SUB.
2519    if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2520      NewShiftAmt = Add0;
2521    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2522    // generate a NEG instead of a SUB of a constant.
2523    else if (ShiftAmt->getOpcode() == ISD::SUB &&
2524             isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2525             (Add0Imm % Size == 0)) {
2526      unsigned NegOpc;
2527      unsigned ZeroReg;
2528      EVT SubVT = ShiftAmt->getValueType(0);
2529      if (SubVT == MVT::i32) {
2530        NegOpc = AArch64::SUBWrr;
2531        ZeroReg = AArch64::WZR;
2532      } else {
2533        assert(SubVT == MVT::i64);
2534        NegOpc = AArch64::SUBXrr;
2535        ZeroReg = AArch64::XZR;
2536      }
2537      SDValue Zero =
2538          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2539      MachineSDNode *Neg =
2540          CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2541      NewShiftAmt = SDValue(Neg, 0);
2542    } else
2543      return false;
2544  } else {
2545    // If the shift amount is masked with an AND, check that the mask covers the
2546    // bits that are implicitly ANDed off by the above opcodes and if so, skip
2547    // the AND.
2548    uint64_t MaskImm;
2549    if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2550      return false;
2551
2552    if (countTrailingOnes(MaskImm) < Bits)
2553      return false;
2554
2555    NewShiftAmt = ShiftAmt->getOperand(0);
2556  }
2557
2558  // Narrow/widen the shift amount to match the size of the shift operation.
2559  if (VT == MVT::i32)
2560    NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2561  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2562    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2563    MachineSDNode *Ext = CurDAG->getMachineNode(
2564        AArch64::SUBREG_TO_REG, DL, VT,
2565        CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2566    NewShiftAmt = SDValue(Ext, 0);
2567  }
2568
2569  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2570  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2571  return true;
2572}
2573
2574bool
2575AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2576                                              unsigned RegWidth) {
2577  APFloat FVal(0.0);
2578  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2579    FVal = CN->getValueAPF();
2580  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2581    // Some otherwise illegal constants are allowed in this case.
2582    if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2583        !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2584      return false;
2585
2586    ConstantPoolSDNode *CN =
2587        dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2588    FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2589  } else
2590    return false;
2591
2592  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2593  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2594  // x-register.
2595  //
2596  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2597  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2598  // integers.
2599  bool IsExact;
2600
2601  // fbits is between 1 and 64 in the worst-case, which means the fmul
2602  // could have 2^64 as an actual operand. Need 65 bits of precision.
2603  APSInt IntVal(65, true);
2604  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2605
2606  // N.b. isPowerOf2 also checks for > 0.
2607  if (!IsExact || !IntVal.isPowerOf2()) return false;
2608  unsigned FBits = IntVal.logBase2();
2609
2610  // Checks above should have guaranteed that we haven't lost information in
2611  // finding FBits, but it must still be in range.
2612  if (FBits == 0 || FBits > RegWidth) return false;
2613
2614  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2615  return true;
2616}
2617
2618// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2619// of the string and obtains the integer values from them and combines these
2620// into a single value to be used in the MRS/MSR instruction.
2621static int getIntOperandFromRegisterString(StringRef RegString) {
2622  SmallVector<StringRef, 5> Fields;
2623  RegString.split(Fields, ':');
2624
2625  if (Fields.size() == 1)
2626    return -1;
2627
2628  assert(Fields.size() == 5
2629            && "Invalid number of fields in read register string");
2630
2631  SmallVector<int, 5> Ops;
2632  bool AllIntFields = true;
2633
2634  for (StringRef Field : Fields) {
2635    unsigned IntField;
2636    AllIntFields &= !Field.getAsInteger(10, IntField);
2637    Ops.push_back(IntField);
2638  }
2639
2640  assert(AllIntFields &&
2641          "Unexpected non-integer value in special register string.");
2642
2643  // Need to combine the integer fields of the string into a single value
2644  // based on the bit encoding of MRS/MSR instruction.
2645  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2646         (Ops[3] << 3) | (Ops[4]);
2647}
2648
2649// Lower the read_register intrinsic to an MRS instruction node if the special
2650// register string argument is either of the form detailed in the ALCE (the
2651// form described in getIntOperandsFromRegsterString) or is a named register
2652// known by the MRS SysReg mapper.
2653bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2654  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2655  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2656  SDLoc DL(N);
2657
2658  int Reg = getIntOperandFromRegisterString(RegString->getString());
2659  if (Reg != -1) {
2660    ReplaceNode(N, CurDAG->getMachineNode(
2661                       AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2662                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2663                       N->getOperand(0)));
2664    return true;
2665  }
2666
2667  // Use the sysreg mapper to map the remaining possible strings to the
2668  // value for the register to be used for the instruction operand.
2669  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2670  if (TheReg && TheReg->Readable &&
2671      TheReg->haveFeatures(Subtarget->getFeatureBits()))
2672    Reg = TheReg->Encoding;
2673  else
2674    Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2675
2676  if (Reg != -1) {
2677    ReplaceNode(N, CurDAG->getMachineNode(
2678                       AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2679                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2680                       N->getOperand(0)));
2681    return true;
2682  }
2683
2684  if (RegString->getString() == "pc") {
2685    ReplaceNode(N, CurDAG->getMachineNode(
2686                       AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
2687                       CurDAG->getTargetConstant(0, DL, MVT::i32),
2688                       N->getOperand(0)));
2689    return true;
2690  }
2691
2692  return false;
2693}
2694
2695// Lower the write_register intrinsic to an MSR instruction node if the special
2696// register string argument is either of the form detailed in the ALCE (the
2697// form described in getIntOperandsFromRegsterString) or is a named register
2698// known by the MSR SysReg mapper.
2699bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2700  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2701  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2702  SDLoc DL(N);
2703
2704  int Reg = getIntOperandFromRegisterString(RegString->getString());
2705  if (Reg != -1) {
2706    ReplaceNode(
2707        N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2708                                  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2709                                  N->getOperand(2), N->getOperand(0)));
2710    return true;
2711  }
2712
2713  // Check if the register was one of those allowed as the pstatefield value in
2714  // the MSR (immediate) instruction. To accept the values allowed in the
2715  // pstatefield for the MSR (immediate) instruction, we also require that an
2716  // immediate value has been provided as an argument, we know that this is
2717  // the case as it has been ensured by semantic checking.
2718  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2719  if (PMapper) {
2720    assert (isa<ConstantSDNode>(N->getOperand(2))
2721              && "Expected a constant integer expression.");
2722    unsigned Reg = PMapper->Encoding;
2723    uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2724    unsigned State;
2725    if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2726      assert(Immed < 2 && "Bad imm");
2727      State = AArch64::MSRpstateImm1;
2728    } else {
2729      assert(Immed < 16 && "Bad imm");
2730      State = AArch64::MSRpstateImm4;
2731    }
2732    ReplaceNode(N, CurDAG->getMachineNode(
2733                       State, DL, MVT::Other,
2734                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2735                       CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2736                       N->getOperand(0)));
2737    return true;
2738  }
2739
2740  // Use the sysreg mapper to attempt to map the remaining possible strings
2741  // to the value for the register to be used for the MSR (register)
2742  // instruction operand.
2743  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2744  if (TheReg && TheReg->Writeable &&
2745      TheReg->haveFeatures(Subtarget->getFeatureBits()))
2746    Reg = TheReg->Encoding;
2747  else
2748    Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2749  if (Reg != -1) {
2750    ReplaceNode(N, CurDAG->getMachineNode(
2751                       AArch64::MSR, DL, MVT::Other,
2752                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2753                       N->getOperand(2), N->getOperand(0)));
2754    return true;
2755  }
2756
2757  return false;
2758}
2759
2760/// We've got special pseudo-instructions for these
2761bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2762  unsigned Opcode;
2763  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2764
2765  // Leave IR for LSE if subtarget supports it.
2766  if (Subtarget->hasLSE()) return false;
2767
2768  if (MemTy == MVT::i8)
2769    Opcode = AArch64::CMP_SWAP_8;
2770  else if (MemTy == MVT::i16)
2771    Opcode = AArch64::CMP_SWAP_16;
2772  else if (MemTy == MVT::i32)
2773    Opcode = AArch64::CMP_SWAP_32;
2774  else if (MemTy == MVT::i64)
2775    Opcode = AArch64::CMP_SWAP_64;
2776  else
2777    llvm_unreachable("Unknown AtomicCmpSwap type");
2778
2779  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2780  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2781                   N->getOperand(0)};
2782  SDNode *CmpSwap = CurDAG->getMachineNode(
2783      Opcode, SDLoc(N),
2784      CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2785
2786  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2787  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2788
2789  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2790  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2791  CurDAG->RemoveDeadNode(N);
2792
2793  return true;
2794}
2795
2796bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
2797  // tagp(FrameIndex, IRGstack, tag_offset):
2798  // since the offset between FrameIndex and IRGstack is a compile-time
2799  // constant, this can be lowered to a single ADDG instruction.
2800  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
2801    return false;
2802  }
2803
2804  SDValue IRG_SP = N->getOperand(2);
2805  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
2806      cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
2807          Intrinsic::aarch64_irg_sp) {
2808    return false;
2809  }
2810
2811  const TargetLowering *TLI = getTargetLowering();
2812  SDLoc DL(N);
2813  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
2814  SDValue FiOp = CurDAG->getTargetFrameIndex(
2815      FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2816  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2817
2818  SDNode *Out = CurDAG->getMachineNode(
2819      AArch64::TAGPstack, DL, MVT::i64,
2820      {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
2821       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
2822  ReplaceNode(N, Out);
2823  return true;
2824}
2825
2826void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
2827  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
2828         "llvm.aarch64.tagp third argument must be an immediate");
2829  if (trySelectStackSlotTagP(N))
2830    return;
2831  // FIXME: above applies in any case when offset between Op1 and Op2 is a
2832  // compile-time constant, not just for stack allocations.
2833
2834  // General case for unrelated pointers in Op1 and Op2.
2835  SDLoc DL(N);
2836  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2837  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
2838                                      {N->getOperand(1), N->getOperand(2)});
2839  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
2840                                      {SDValue(N1, 0), N->getOperand(2)});
2841  SDNode *N3 = CurDAG->getMachineNode(
2842      AArch64::ADDG, DL, MVT::i64,
2843      {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
2844       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
2845  ReplaceNode(N, N3);
2846}
2847
2848void AArch64DAGToDAGISel::Select(SDNode *Node) {
2849  // If we have a custom node, we already have selected!
2850  if (Node->isMachineOpcode()) {
2851    LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2852    Node->setNodeId(-1);
2853    return;
2854  }
2855
2856  // Few custom selection stuff.
2857  EVT VT = Node->getValueType(0);
2858
2859  switch (Node->getOpcode()) {
2860  default:
2861    break;
2862
2863  case ISD::ATOMIC_CMP_SWAP:
2864    if (SelectCMP_SWAP(Node))
2865      return;
2866    break;
2867
2868  case ISD::READ_REGISTER:
2869    if (tryReadRegister(Node))
2870      return;
2871    break;
2872
2873  case ISD::WRITE_REGISTER:
2874    if (tryWriteRegister(Node))
2875      return;
2876    break;
2877
2878  case ISD::ADD:
2879    if (tryMLAV64LaneV128(Node))
2880      return;
2881    break;
2882
2883  case ISD::LOAD: {
2884    // Try to select as an indexed load. Fall through to normal processing
2885    // if we can't.
2886    if (tryIndexedLoad(Node))
2887      return;
2888    break;
2889  }
2890
2891  case ISD::SRL:
2892  case ISD::AND:
2893  case ISD::SRA:
2894  case ISD::SIGN_EXTEND_INREG:
2895    if (tryBitfieldExtractOp(Node))
2896      return;
2897    if (tryBitfieldInsertInZeroOp(Node))
2898      return;
2899    LLVM_FALLTHROUGH;
2900  case ISD::ROTR:
2901  case ISD::SHL:
2902    if (tryShiftAmountMod(Node))
2903      return;
2904    break;
2905
2906  case ISD::SIGN_EXTEND:
2907    if (tryBitfieldExtractOpFromSExt(Node))
2908      return;
2909    break;
2910
2911  case ISD::OR:
2912    if (tryBitfieldInsertOp(Node))
2913      return;
2914    break;
2915
2916  case ISD::EXTRACT_VECTOR_ELT: {
2917    // Extracting lane zero is a special case where we can just use a plain
2918    // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2919    // the rest of the compiler, especially the register allocator and copyi
2920    // propagation, to reason about, so is preferred when it's possible to
2921    // use it.
2922    ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2923    // Bail and use the default Select() for non-zero lanes.
2924    if (LaneNode->getZExtValue() != 0)
2925      break;
2926    // If the element type is not the same as the result type, likewise
2927    // bail and use the default Select(), as there's more to do than just
2928    // a cross-class COPY. This catches extracts of i8 and i16 elements
2929    // since they will need an explicit zext.
2930    if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2931      break;
2932    unsigned SubReg;
2933    switch (Node->getOperand(0)
2934                .getValueType()
2935                .getVectorElementType()
2936                .getSizeInBits()) {
2937    default:
2938      llvm_unreachable("Unexpected vector element type!");
2939    case 64:
2940      SubReg = AArch64::dsub;
2941      break;
2942    case 32:
2943      SubReg = AArch64::ssub;
2944      break;
2945    case 16:
2946      SubReg = AArch64::hsub;
2947      break;
2948    case 8:
2949      llvm_unreachable("unexpected zext-requiring extract element!");
2950    }
2951    SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2952                                                     Node->getOperand(0));
2953    LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2954    LLVM_DEBUG(Extract->dumpr(CurDAG));
2955    LLVM_DEBUG(dbgs() << "\n");
2956    ReplaceNode(Node, Extract.getNode());
2957    return;
2958  }
2959  case ISD::Constant: {
2960    // Materialize zero constants as copies from WZR/XZR.  This allows
2961    // the coalescer to propagate these into other instructions.
2962    ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2963    if (ConstNode->isNullValue()) {
2964      if (VT == MVT::i32) {
2965        SDValue New = CurDAG->getCopyFromReg(
2966            CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2967        ReplaceNode(Node, New.getNode());
2968        return;
2969      } else if (VT == MVT::i64) {
2970        SDValue New = CurDAG->getCopyFromReg(
2971            CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2972        ReplaceNode(Node, New.getNode());
2973        return;
2974      }
2975    }
2976    break;
2977  }
2978
2979  case ISD::FrameIndex: {
2980    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2981    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2982    unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2983    const TargetLowering *TLI = getTargetLowering();
2984    SDValue TFI = CurDAG->getTargetFrameIndex(
2985        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2986    SDLoc DL(Node);
2987    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2988                      CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2989    CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2990    return;
2991  }
2992  case ISD::INTRINSIC_W_CHAIN: {
2993    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2994    switch (IntNo) {
2995    default:
2996      break;
2997    case Intrinsic::aarch64_ldaxp:
2998    case Intrinsic::aarch64_ldxp: {
2999      unsigned Op =
3000          IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
3001      SDValue MemAddr = Node->getOperand(2);
3002      SDLoc DL(Node);
3003      SDValue Chain = Node->getOperand(0);
3004
3005      SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
3006                                          MVT::Other, MemAddr, Chain);
3007
3008      // Transfer memoperands.
3009      MachineMemOperand *MemOp =
3010          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3011      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3012      ReplaceNode(Node, Ld);
3013      return;
3014    }
3015    case Intrinsic::aarch64_stlxp:
3016    case Intrinsic::aarch64_stxp: {
3017      unsigned Op =
3018          IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
3019      SDLoc DL(Node);
3020      SDValue Chain = Node->getOperand(0);
3021      SDValue ValLo = Node->getOperand(2);
3022      SDValue ValHi = Node->getOperand(3);
3023      SDValue MemAddr = Node->getOperand(4);
3024
3025      // Place arguments in the right order.
3026      SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
3027
3028      SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
3029      // Transfer memoperands.
3030      MachineMemOperand *MemOp =
3031          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3032      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3033
3034      ReplaceNode(Node, St);
3035      return;
3036    }
3037    case Intrinsic::aarch64_neon_ld1x2:
3038      if (VT == MVT::v8i8) {
3039        SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
3040        return;
3041      } else if (VT == MVT::v16i8) {
3042        SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3043        return;
3044      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3045        SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3046        return;
3047      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3048        SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3049        return;
3050      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3051        SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3052        return;
3053      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3054        SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3055        return;
3056      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3057        SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3058        return;
3059      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3060        SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3061        return;
3062      }
3063      break;
3064    case Intrinsic::aarch64_neon_ld1x3:
3065      if (VT == MVT::v8i8) {
3066        SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3067        return;
3068      } else if (VT == MVT::v16i8) {
3069        SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3070        return;
3071      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3072        SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3073        return;
3074      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3075        SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3076        return;
3077      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3078        SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3079        return;
3080      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3081        SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3082        return;
3083      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3084        SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3085        return;
3086      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3087        SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3088        return;
3089      }
3090      break;
3091    case Intrinsic::aarch64_neon_ld1x4:
3092      if (VT == MVT::v8i8) {
3093        SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3094        return;
3095      } else if (VT == MVT::v16i8) {
3096        SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3097        return;
3098      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3099        SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3100        return;
3101      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3102        SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3103        return;
3104      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3105        SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3106        return;
3107      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3108        SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3109        return;
3110      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3111        SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3112        return;
3113      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3114        SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3115        return;
3116      }
3117      break;
3118    case Intrinsic::aarch64_neon_ld2:
3119      if (VT == MVT::v8i8) {
3120        SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3121        return;
3122      } else if (VT == MVT::v16i8) {
3123        SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3124        return;
3125      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3126        SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3127        return;
3128      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3129        SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3130        return;
3131      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3132        SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3133        return;
3134      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3135        SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3136        return;
3137      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3138        SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3139        return;
3140      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3141        SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3142        return;
3143      }
3144      break;
3145    case Intrinsic::aarch64_neon_ld3:
3146      if (VT == MVT::v8i8) {
3147        SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3148        return;
3149      } else if (VT == MVT::v16i8) {
3150        SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3151        return;
3152      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3153        SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3154        return;
3155      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3156        SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3157        return;
3158      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3159        SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3160        return;
3161      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3162        SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3163        return;
3164      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3165        SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3166        return;
3167      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3168        SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3169        return;
3170      }
3171      break;
3172    case Intrinsic::aarch64_neon_ld4:
3173      if (VT == MVT::v8i8) {
3174        SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3175        return;
3176      } else if (VT == MVT::v16i8) {
3177        SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3178        return;
3179      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3180        SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3181        return;
3182      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3183        SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3184        return;
3185      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3186        SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3187        return;
3188      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3189        SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3190        return;
3191      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3192        SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3193        return;
3194      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3195        SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3196        return;
3197      }
3198      break;
3199    case Intrinsic::aarch64_neon_ld2r:
3200      if (VT == MVT::v8i8) {
3201        SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3202        return;
3203      } else if (VT == MVT::v16i8) {
3204        SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3205        return;
3206      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3207        SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3208        return;
3209      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3210        SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3211        return;
3212      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3213        SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3214        return;
3215      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3216        SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3217        return;
3218      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3219        SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3220        return;
3221      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3222        SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3223        return;
3224      }
3225      break;
3226    case Intrinsic::aarch64_neon_ld3r:
3227      if (VT == MVT::v8i8) {
3228        SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3229        return;
3230      } else if (VT == MVT::v16i8) {
3231        SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3232        return;
3233      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3234        SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3235        return;
3236      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3237        SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3238        return;
3239      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3240        SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3241        return;
3242      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3243        SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3244        return;
3245      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3246        SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3247        return;
3248      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3249        SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3250        return;
3251      }
3252      break;
3253    case Intrinsic::aarch64_neon_ld4r:
3254      if (VT == MVT::v8i8) {
3255        SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3256        return;
3257      } else if (VT == MVT::v16i8) {
3258        SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3259        return;
3260      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3261        SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3262        return;
3263      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3264        SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3265        return;
3266      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3267        SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3268        return;
3269      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3270        SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3271        return;
3272      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3273        SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3274        return;
3275      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3276        SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3277        return;
3278      }
3279      break;
3280    case Intrinsic::aarch64_neon_ld2lane:
3281      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3282        SelectLoadLane(Node, 2, AArch64::LD2i8);
3283        return;
3284      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3285                 VT == MVT::v8f16) {
3286        SelectLoadLane(Node, 2, AArch64::LD2i16);
3287        return;
3288      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3289                 VT == MVT::v2f32) {
3290        SelectLoadLane(Node, 2, AArch64::LD2i32);
3291        return;
3292      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3293                 VT == MVT::v1f64) {
3294        SelectLoadLane(Node, 2, AArch64::LD2i64);
3295        return;
3296      }
3297      break;
3298    case Intrinsic::aarch64_neon_ld3lane:
3299      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3300        SelectLoadLane(Node, 3, AArch64::LD3i8);
3301        return;
3302      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3303                 VT == MVT::v8f16) {
3304        SelectLoadLane(Node, 3, AArch64::LD3i16);
3305        return;
3306      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3307                 VT == MVT::v2f32) {
3308        SelectLoadLane(Node, 3, AArch64::LD3i32);
3309        return;
3310      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3311                 VT == MVT::v1f64) {
3312        SelectLoadLane(Node, 3, AArch64::LD3i64);
3313        return;
3314      }
3315      break;
3316    case Intrinsic::aarch64_neon_ld4lane:
3317      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3318        SelectLoadLane(Node, 4, AArch64::LD4i8);
3319        return;
3320      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3321                 VT == MVT::v8f16) {
3322        SelectLoadLane(Node, 4, AArch64::LD4i16);
3323        return;
3324      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3325                 VT == MVT::v2f32) {
3326        SelectLoadLane(Node, 4, AArch64::LD4i32);
3327        return;
3328      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3329                 VT == MVT::v1f64) {
3330        SelectLoadLane(Node, 4, AArch64::LD4i64);
3331        return;
3332      }
3333      break;
3334    }
3335  } break;
3336  case ISD::INTRINSIC_WO_CHAIN: {
3337    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3338    switch (IntNo) {
3339    default:
3340      break;
3341    case Intrinsic::aarch64_tagp:
3342      SelectTagP(Node);
3343      return;
3344    case Intrinsic::aarch64_neon_tbl2:
3345      SelectTable(Node, 2,
3346                  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3347                  false);
3348      return;
3349    case Intrinsic::aarch64_neon_tbl3:
3350      SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3351                                           : AArch64::TBLv16i8Three,
3352                  false);
3353      return;
3354    case Intrinsic::aarch64_neon_tbl4:
3355      SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3356                                           : AArch64::TBLv16i8Four,
3357                  false);
3358      return;
3359    case Intrinsic::aarch64_neon_tbx2:
3360      SelectTable(Node, 2,
3361                  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3362                  true);
3363      return;
3364    case Intrinsic::aarch64_neon_tbx3:
3365      SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3366                                           : AArch64::TBXv16i8Three,
3367                  true);
3368      return;
3369    case Intrinsic::aarch64_neon_tbx4:
3370      SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3371                                           : AArch64::TBXv16i8Four,
3372                  true);
3373      return;
3374    case Intrinsic::aarch64_neon_smull:
3375    case Intrinsic::aarch64_neon_umull:
3376      if (tryMULLV64LaneV128(IntNo, Node))
3377        return;
3378      break;
3379    }
3380    break;
3381  }
3382  case ISD::INTRINSIC_VOID: {
3383    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3384    if (Node->getNumOperands() >= 3)
3385      VT = Node->getOperand(2)->getValueType(0);
3386    switch (IntNo) {
3387    default:
3388      break;
3389    case Intrinsic::aarch64_neon_st1x2: {
3390      if (VT == MVT::v8i8) {
3391        SelectStore(Node, 2, AArch64::ST1Twov8b);
3392        return;
3393      } else if (VT == MVT::v16i8) {
3394        SelectStore(Node, 2, AArch64::ST1Twov16b);
3395        return;
3396      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3397        SelectStore(Node, 2, AArch64::ST1Twov4h);
3398        return;
3399      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3400        SelectStore(Node, 2, AArch64::ST1Twov8h);
3401        return;
3402      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3403        SelectStore(Node, 2, AArch64::ST1Twov2s);
3404        return;
3405      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3406        SelectStore(Node, 2, AArch64::ST1Twov4s);
3407        return;
3408      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3409        SelectStore(Node, 2, AArch64::ST1Twov2d);
3410        return;
3411      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3412        SelectStore(Node, 2, AArch64::ST1Twov1d);
3413        return;
3414      }
3415      break;
3416    }
3417    case Intrinsic::aarch64_neon_st1x3: {
3418      if (VT == MVT::v8i8) {
3419        SelectStore(Node, 3, AArch64::ST1Threev8b);
3420        return;
3421      } else if (VT == MVT::v16i8) {
3422        SelectStore(Node, 3, AArch64::ST1Threev16b);
3423        return;
3424      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3425        SelectStore(Node, 3, AArch64::ST1Threev4h);
3426        return;
3427      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3428        SelectStore(Node, 3, AArch64::ST1Threev8h);
3429        return;
3430      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3431        SelectStore(Node, 3, AArch64::ST1Threev2s);
3432        return;
3433      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3434        SelectStore(Node, 3, AArch64::ST1Threev4s);
3435        return;
3436      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3437        SelectStore(Node, 3, AArch64::ST1Threev2d);
3438        return;
3439      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3440        SelectStore(Node, 3, AArch64::ST1Threev1d);
3441        return;
3442      }
3443      break;
3444    }
3445    case Intrinsic::aarch64_neon_st1x4: {
3446      if (VT == MVT::v8i8) {
3447        SelectStore(Node, 4, AArch64::ST1Fourv8b);
3448        return;
3449      } else if (VT == MVT::v16i8) {
3450        SelectStore(Node, 4, AArch64::ST1Fourv16b);
3451        return;
3452      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3453        SelectStore(Node, 4, AArch64::ST1Fourv4h);
3454        return;
3455      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3456        SelectStore(Node, 4, AArch64::ST1Fourv8h);
3457        return;
3458      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3459        SelectStore(Node, 4, AArch64::ST1Fourv2s);
3460        return;
3461      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3462        SelectStore(Node, 4, AArch64::ST1Fourv4s);
3463        return;
3464      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3465        SelectStore(Node, 4, AArch64::ST1Fourv2d);
3466        return;
3467      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3468        SelectStore(Node, 4, AArch64::ST1Fourv1d);
3469        return;
3470      }
3471      break;
3472    }
3473    case Intrinsic::aarch64_neon_st2: {
3474      if (VT == MVT::v8i8) {
3475        SelectStore(Node, 2, AArch64::ST2Twov8b);
3476        return;
3477      } else if (VT == MVT::v16i8) {
3478        SelectStore(Node, 2, AArch64::ST2Twov16b);
3479        return;
3480      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3481        SelectStore(Node, 2, AArch64::ST2Twov4h);
3482        return;
3483      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3484        SelectStore(Node, 2, AArch64::ST2Twov8h);
3485        return;
3486      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3487        SelectStore(Node, 2, AArch64::ST2Twov2s);
3488        return;
3489      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3490        SelectStore(Node, 2, AArch64::ST2Twov4s);
3491        return;
3492      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3493        SelectStore(Node, 2, AArch64::ST2Twov2d);
3494        return;
3495      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3496        SelectStore(Node, 2, AArch64::ST1Twov1d);
3497        return;
3498      }
3499      break;
3500    }
3501    case Intrinsic::aarch64_neon_st3: {
3502      if (VT == MVT::v8i8) {
3503        SelectStore(Node, 3, AArch64::ST3Threev8b);
3504        return;
3505      } else if (VT == MVT::v16i8) {
3506        SelectStore(Node, 3, AArch64::ST3Threev16b);
3507        return;
3508      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3509        SelectStore(Node, 3, AArch64::ST3Threev4h);
3510        return;
3511      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3512        SelectStore(Node, 3, AArch64::ST3Threev8h);
3513        return;
3514      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3515        SelectStore(Node, 3, AArch64::ST3Threev2s);
3516        return;
3517      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3518        SelectStore(Node, 3, AArch64::ST3Threev4s);
3519        return;
3520      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3521        SelectStore(Node, 3, AArch64::ST3Threev2d);
3522        return;
3523      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3524        SelectStore(Node, 3, AArch64::ST1Threev1d);
3525        return;
3526      }
3527      break;
3528    }
3529    case Intrinsic::aarch64_neon_st4: {
3530      if (VT == MVT::v8i8) {
3531        SelectStore(Node, 4, AArch64::ST4Fourv8b);
3532        return;
3533      } else if (VT == MVT::v16i8) {
3534        SelectStore(Node, 4, AArch64::ST4Fourv16b);
3535        return;
3536      } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3537        SelectStore(Node, 4, AArch64::ST4Fourv4h);
3538        return;
3539      } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3540        SelectStore(Node, 4, AArch64::ST4Fourv8h);
3541        return;
3542      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3543        SelectStore(Node, 4, AArch64::ST4Fourv2s);
3544        return;
3545      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3546        SelectStore(Node, 4, AArch64::ST4Fourv4s);
3547        return;
3548      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3549        SelectStore(Node, 4, AArch64::ST4Fourv2d);
3550        return;
3551      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3552        SelectStore(Node, 4, AArch64::ST1Fourv1d);
3553        return;
3554      }
3555      break;
3556    }
3557    case Intrinsic::aarch64_neon_st2lane: {
3558      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3559        SelectStoreLane(Node, 2, AArch64::ST2i8);
3560        return;
3561      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3562                 VT == MVT::v8f16) {
3563        SelectStoreLane(Node, 2, AArch64::ST2i16);
3564        return;
3565      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3566                 VT == MVT::v2f32) {
3567        SelectStoreLane(Node, 2, AArch64::ST2i32);
3568        return;
3569      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3570                 VT == MVT::v1f64) {
3571        SelectStoreLane(Node, 2, AArch64::ST2i64);
3572        return;
3573      }
3574      break;
3575    }
3576    case Intrinsic::aarch64_neon_st3lane: {
3577      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3578        SelectStoreLane(Node, 3, AArch64::ST3i8);
3579        return;
3580      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3581                 VT == MVT::v8f16) {
3582        SelectStoreLane(Node, 3, AArch64::ST3i16);
3583        return;
3584      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3585                 VT == MVT::v2f32) {
3586        SelectStoreLane(Node, 3, AArch64::ST3i32);
3587        return;
3588      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3589                 VT == MVT::v1f64) {
3590        SelectStoreLane(Node, 3, AArch64::ST3i64);
3591        return;
3592      }
3593      break;
3594    }
3595    case Intrinsic::aarch64_neon_st4lane: {
3596      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3597        SelectStoreLane(Node, 4, AArch64::ST4i8);
3598        return;
3599      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3600                 VT == MVT::v8f16) {
3601        SelectStoreLane(Node, 4, AArch64::ST4i16);
3602        return;
3603      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3604                 VT == MVT::v2f32) {
3605        SelectStoreLane(Node, 4, AArch64::ST4i32);
3606        return;
3607      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3608                 VT == MVT::v1f64) {
3609        SelectStoreLane(Node, 4, AArch64::ST4i64);
3610        return;
3611      }
3612      break;
3613    }
3614    }
3615    break;
3616  }
3617  case AArch64ISD::LD2post: {
3618    if (VT == MVT::v8i8) {
3619      SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3620      return;
3621    } else if (VT == MVT::v16i8) {
3622      SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3623      return;
3624    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3625      SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3626      return;
3627    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3628      SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3629      return;
3630    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3631      SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3632      return;
3633    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3634      SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3635      return;
3636    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3637      SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3638      return;
3639    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3640      SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3641      return;
3642    }
3643    break;
3644  }
3645  case AArch64ISD::LD3post: {
3646    if (VT == MVT::v8i8) {
3647      SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3648      return;
3649    } else if (VT == MVT::v16i8) {
3650      SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3651      return;
3652    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3653      SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3654      return;
3655    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3656      SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3657      return;
3658    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3659      SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3660      return;
3661    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3662      SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3663      return;
3664    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3665      SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3666      return;
3667    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3668      SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3669      return;
3670    }
3671    break;
3672  }
3673  case AArch64ISD::LD4post: {
3674    if (VT == MVT::v8i8) {
3675      SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3676      return;
3677    } else if (VT == MVT::v16i8) {
3678      SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3679      return;
3680    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3681      SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3682      return;
3683    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3684      SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3685      return;
3686    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3687      SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3688      return;
3689    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3690      SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3691      return;
3692    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3693      SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3694      return;
3695    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3696      SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3697      return;
3698    }
3699    break;
3700  }
3701  case AArch64ISD::LD1x2post: {
3702    if (VT == MVT::v8i8) {
3703      SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3704      return;
3705    } else if (VT == MVT::v16i8) {
3706      SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3707      return;
3708    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3709      SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3710      return;
3711    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3712      SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3713      return;
3714    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3715      SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3716      return;
3717    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3718      SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3719      return;
3720    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3721      SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3722      return;
3723    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3724      SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3725      return;
3726    }
3727    break;
3728  }
3729  case AArch64ISD::LD1x3post: {
3730    if (VT == MVT::v8i8) {
3731      SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3732      return;
3733    } else if (VT == MVT::v16i8) {
3734      SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3735      return;
3736    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3737      SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3738      return;
3739    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3740      SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3741      return;
3742    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3743      SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3744      return;
3745    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3746      SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3747      return;
3748    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3749      SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3750      return;
3751    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3752      SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3753      return;
3754    }
3755    break;
3756  }
3757  case AArch64ISD::LD1x4post: {
3758    if (VT == MVT::v8i8) {
3759      SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3760      return;
3761    } else if (VT == MVT::v16i8) {
3762      SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3763      return;
3764    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3765      SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3766      return;
3767    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3768      SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3769      return;
3770    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3771      SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3772      return;
3773    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3774      SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3775      return;
3776    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3777      SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3778      return;
3779    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3780      SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3781      return;
3782    }
3783    break;
3784  }
3785  case AArch64ISD::LD1DUPpost: {
3786    if (VT == MVT::v8i8) {
3787      SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3788      return;
3789    } else if (VT == MVT::v16i8) {
3790      SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3791      return;
3792    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3793      SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3794      return;
3795    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3796      SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3797      return;
3798    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3799      SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3800      return;
3801    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3802      SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3803      return;
3804    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3805      SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3806      return;
3807    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3808      SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3809      return;
3810    }
3811    break;
3812  }
3813  case AArch64ISD::LD2DUPpost: {
3814    if (VT == MVT::v8i8) {
3815      SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3816      return;
3817    } else if (VT == MVT::v16i8) {
3818      SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3819      return;
3820    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3821      SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3822      return;
3823    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3824      SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3825      return;
3826    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3827      SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3828      return;
3829    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3830      SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3831      return;
3832    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3833      SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3834      return;
3835    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3836      SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3837      return;
3838    }
3839    break;
3840  }
3841  case AArch64ISD::LD3DUPpost: {
3842    if (VT == MVT::v8i8) {
3843      SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3844      return;
3845    } else if (VT == MVT::v16i8) {
3846      SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3847      return;
3848    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3849      SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3850      return;
3851    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3852      SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3853      return;
3854    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3855      SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3856      return;
3857    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3858      SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3859      return;
3860    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3861      SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3862      return;
3863    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3864      SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3865      return;
3866    }
3867    break;
3868  }
3869  case AArch64ISD::LD4DUPpost: {
3870    if (VT == MVT::v8i8) {
3871      SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3872      return;
3873    } else if (VT == MVT::v16i8) {
3874      SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3875      return;
3876    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3877      SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3878      return;
3879    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3880      SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3881      return;
3882    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3883      SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3884      return;
3885    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3886      SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3887      return;
3888    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3889      SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3890      return;
3891    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3892      SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3893      return;
3894    }
3895    break;
3896  }
3897  case AArch64ISD::LD1LANEpost: {
3898    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3899      SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3900      return;
3901    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3902               VT == MVT::v8f16) {
3903      SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3904      return;
3905    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3906               VT == MVT::v2f32) {
3907      SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3908      return;
3909    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3910               VT == MVT::v1f64) {
3911      SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3912      return;
3913    }
3914    break;
3915  }
3916  case AArch64ISD::LD2LANEpost: {
3917    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3918      SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3919      return;
3920    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3921               VT == MVT::v8f16) {
3922      SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3923      return;
3924    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3925               VT == MVT::v2f32) {
3926      SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3927      return;
3928    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3929               VT == MVT::v1f64) {
3930      SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3931      return;
3932    }
3933    break;
3934  }
3935  case AArch64ISD::LD3LANEpost: {
3936    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3937      SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3938      return;
3939    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3940               VT == MVT::v8f16) {
3941      SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3942      return;
3943    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3944               VT == MVT::v2f32) {
3945      SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3946      return;
3947    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3948               VT == MVT::v1f64) {
3949      SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3950      return;
3951    }
3952    break;
3953  }
3954  case AArch64ISD::LD4LANEpost: {
3955    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3956      SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3957      return;
3958    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3959               VT == MVT::v8f16) {
3960      SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3961      return;
3962    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3963               VT == MVT::v2f32) {
3964      SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3965      return;
3966    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3967               VT == MVT::v1f64) {
3968      SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3969      return;
3970    }
3971    break;
3972  }
3973  case AArch64ISD::ST2post: {
3974    VT = Node->getOperand(1).getValueType();
3975    if (VT == MVT::v8i8) {
3976      SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3977      return;
3978    } else if (VT == MVT::v16i8) {
3979      SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3980      return;
3981    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3982      SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3983      return;
3984    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3985      SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3986      return;
3987    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3988      SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3989      return;
3990    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3991      SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3992      return;
3993    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3994      SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3995      return;
3996    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3997      SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3998      return;
3999    }
4000    break;
4001  }
4002  case AArch64ISD::ST3post: {
4003    VT = Node->getOperand(1).getValueType();
4004    if (VT == MVT::v8i8) {
4005      SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
4006      return;
4007    } else if (VT == MVT::v16i8) {
4008      SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
4009      return;
4010    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4011      SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
4012      return;
4013    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4014      SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
4015      return;
4016    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4017      SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
4018      return;
4019    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4020      SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
4021      return;
4022    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4023      SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
4024      return;
4025    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4026      SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4027      return;
4028    }
4029    break;
4030  }
4031  case AArch64ISD::ST4post: {
4032    VT = Node->getOperand(1).getValueType();
4033    if (VT == MVT::v8i8) {
4034      SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
4035      return;
4036    } else if (VT == MVT::v16i8) {
4037      SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
4038      return;
4039    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4040      SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
4041      return;
4042    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4043      SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4044      return;
4045    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4046      SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4047      return;
4048    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4049      SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4050      return;
4051    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4052      SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4053      return;
4054    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4055      SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4056      return;
4057    }
4058    break;
4059  }
4060  case AArch64ISD::ST1x2post: {
4061    VT = Node->getOperand(1).getValueType();
4062    if (VT == MVT::v8i8) {
4063      SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4064      return;
4065    } else if (VT == MVT::v16i8) {
4066      SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4067      return;
4068    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4069      SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4070      return;
4071    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4072      SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4073      return;
4074    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4075      SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4076      return;
4077    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4078      SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4079      return;
4080    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4081      SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4082      return;
4083    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4084      SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4085      return;
4086    }
4087    break;
4088  }
4089  case AArch64ISD::ST1x3post: {
4090    VT = Node->getOperand(1).getValueType();
4091    if (VT == MVT::v8i8) {
4092      SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4093      return;
4094    } else if (VT == MVT::v16i8) {
4095      SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4096      return;
4097    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4098      SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4099      return;
4100    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4101      SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4102      return;
4103    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4104      SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4105      return;
4106    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4107      SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4108      return;
4109    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4110      SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4111      return;
4112    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4113      SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4114      return;
4115    }
4116    break;
4117  }
4118  case AArch64ISD::ST1x4post: {
4119    VT = Node->getOperand(1).getValueType();
4120    if (VT == MVT::v8i8) {
4121      SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4122      return;
4123    } else if (VT == MVT::v16i8) {
4124      SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4125      return;
4126    } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4127      SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4128      return;
4129    } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4130      SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4131      return;
4132    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4133      SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4134      return;
4135    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4136      SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4137      return;
4138    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4139      SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4140      return;
4141    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4142      SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4143      return;
4144    }
4145    break;
4146  }
4147  case AArch64ISD::ST2LANEpost: {
4148    VT = Node->getOperand(1).getValueType();
4149    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4150      SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4151      return;
4152    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4153               VT == MVT::v8f16) {
4154      SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4155      return;
4156    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4157               VT == MVT::v2f32) {
4158      SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4159      return;
4160    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4161               VT == MVT::v1f64) {
4162      SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4163      return;
4164    }
4165    break;
4166  }
4167  case AArch64ISD::ST3LANEpost: {
4168    VT = Node->getOperand(1).getValueType();
4169    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4170      SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4171      return;
4172    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4173               VT == MVT::v8f16) {
4174      SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4175      return;
4176    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4177               VT == MVT::v2f32) {
4178      SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4179      return;
4180    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4181               VT == MVT::v1f64) {
4182      SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4183      return;
4184    }
4185    break;
4186  }
4187  case AArch64ISD::ST4LANEpost: {
4188    VT = Node->getOperand(1).getValueType();
4189    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4190      SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4191      return;
4192    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4193               VT == MVT::v8f16) {
4194      SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4195      return;
4196    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4197               VT == MVT::v2f32) {
4198      SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4199      return;
4200    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4201               VT == MVT::v1f64) {
4202      SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4203      return;
4204    }
4205    break;
4206  }
4207  }
4208
4209  // Select the default instruction
4210  SelectCode(Node);
4211}
4212
4213/// createAArch64ISelDag - This pass converts a legalized DAG into a
4214/// AArch64-specific DAG, ready for instruction scheduling.
4215FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
4216                                         CodeGenOpt::Level OptLevel) {
4217  return new AArch64DAGToDAGISel(TM, OptLevel);
4218}
4219