1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64TargetMachine.h"
14#include "MCTargetDesc/AArch64AddressingModes.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/CodeGen/SelectionDAGISel.h"
17#include "llvm/IR/Function.h" // To access function attributes.
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/Intrinsics.h"
20#include "llvm/IR/IntrinsicsAArch64.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/ErrorHandling.h"
23#include "llvm/Support/KnownBits.h"
24#include "llvm/Support/MathExtras.h"
25#include "llvm/Support/raw_ostream.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "aarch64-isel"
30
31//===--------------------------------------------------------------------===//
32/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
33/// instructions for SelectionDAG operations.
34///
35namespace {
36
37class AArch64DAGToDAGISel : public SelectionDAGISel {
38
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42
43public:
44  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
45                               CodeGenOpt::Level OptLevel)
46      : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
47
48  StringRef getPassName() const override {
49    return "AArch64 Instruction Selection";
50  }
51
52  bool runOnMachineFunction(MachineFunction &MF) override {
53    Subtarget = &MF.getSubtarget<AArch64Subtarget>();
54    return SelectionDAGISel::runOnMachineFunction(MF);
55  }
56
57  void Select(SDNode *Node) override;
58
59  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
60  /// inline asm expressions.
61  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
62                                    unsigned ConstraintID,
63                                    std::vector<SDValue> &OutOps) override;
64
65  template <signed Low, signed High, signed Scale>
66  bool SelectRDVLImm(SDValue N, SDValue &Imm);
67
68  bool tryMLAV64LaneV128(SDNode *N);
69  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
70  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74    return SelectShiftedRegister(N, false, Reg, Shift);
75  }
76  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77    return SelectShiftedRegister(N, true, Reg, Shift);
78  }
79  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80    return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81  }
82  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83    return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84  }
85  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86    return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87  }
88  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89    return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90  }
91  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92    return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93  }
94  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95    return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96  }
97  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98    return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99  }
100  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101    return SelectAddrModeIndexed(N, 1, Base, OffImm);
102  }
103  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104    return SelectAddrModeIndexed(N, 2, Base, OffImm);
105  }
106  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107    return SelectAddrModeIndexed(N, 4, Base, OffImm);
108  }
109  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110    return SelectAddrModeIndexed(N, 8, Base, OffImm);
111  }
112  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113    return SelectAddrModeIndexed(N, 16, Base, OffImm);
114  }
115  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116    return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117  }
118  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119    return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120  }
121  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122    return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123  }
124  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125    return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126  }
127  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128    return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129  }
130
131  template<int Width>
132  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
133                         SDValue &SignExtend, SDValue &DoShift) {
134    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135  }
136
137  template<int Width>
138  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
139                         SDValue &SignExtend, SDValue &DoShift) {
140    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141  }
142
143  bool SelectDupZeroOrUndef(SDValue N) {
144    switch(N->getOpcode()) {
145    case ISD::UNDEF:
146      return true;
147    case AArch64ISD::DUP:
148    case ISD::SPLAT_VECTOR: {
149      auto Opnd0 = N->getOperand(0);
150      if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
151        if (CN->isNullValue())
152          return true;
153      if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
154        if (CN->isZero())
155          return true;
156      break;
157    }
158    default:
159      break;
160    }
161
162    return false;
163  }
164
165  bool SelectDupZero(SDValue N) {
166    switch(N->getOpcode()) {
167    case AArch64ISD::DUP:
168    case ISD::SPLAT_VECTOR: {
169      auto Opnd0 = N->getOperand(0);
170      if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
171        if (CN->isNullValue())
172          return true;
173      if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
174        if (CN->isZero())
175          return true;
176      break;
177    }
178    }
179
180    return false;
181  }
182
183  template<MVT::SimpleValueType VT>
184  bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
185    return SelectSVEAddSubImm(N, VT, Imm, Shift);
186  }
187
188  template<MVT::SimpleValueType VT>
189  bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
190    return SelectSVELogicalImm(N, VT, Imm);
191  }
192
193  template <unsigned Low, unsigned High>
194  bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) {
195    return SelectSVEShiftImm64(N, Low, High, Imm);
196  }
197
198  // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
199  template<signed Min, signed Max, signed Scale, bool Shift>
200  bool SelectCntImm(SDValue N, SDValue &Imm) {
201    if (!isa<ConstantSDNode>(N))
202      return false;
203
204    int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
205    if (Shift)
206      MulImm = 1LL << MulImm;
207
208    if ((MulImm % std::abs(Scale)) != 0)
209      return false;
210
211    MulImm /= Scale;
212    if ((MulImm >= Min) && (MulImm <= Max)) {
213      Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
214      return true;
215    }
216
217    return false;
218  }
219
220  /// Form sequences of consecutive 64/128-bit registers for use in NEON
221  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
222  /// between 1 and 4 elements. If it contains a single element that is returned
223  /// unchanged; otherwise a REG_SEQUENCE value is returned.
224  SDValue createDTuple(ArrayRef<SDValue> Vecs);
225  SDValue createQTuple(ArrayRef<SDValue> Vecs);
226  // Form a sequence of SVE registers for instructions using list of vectors,
227  // e.g. structured loads and stores (ldN, stN).
228  SDValue createZTuple(ArrayRef<SDValue> Vecs);
229
230  /// Generic helper for the createDTuple/createQTuple
231  /// functions. Those should almost always be called instead.
232  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
233                      const unsigned SubRegs[]);
234
235  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
236
237  bool tryIndexedLoad(SDNode *N);
238
239  bool trySelectStackSlotTagP(SDNode *N);
240  void SelectTagP(SDNode *N);
241
242  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
243                     unsigned SubRegIdx);
244  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
245                         unsigned SubRegIdx);
246  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
247  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
248  void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
249                            unsigned Opc_rr, unsigned Opc_ri);
250
251  bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
252  /// SVE Reg+Imm addressing mode.
253  template <int64_t Min, int64_t Max>
254  bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
255                                SDValue &OffImm);
256  /// SVE Reg+Reg address mode.
257  template <unsigned Scale>
258  bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
259    return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
260  }
261
262  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
263  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
264  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
265  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
266  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
267                             unsigned Opc_rr, unsigned Opc_ri);
268  std::tuple<unsigned, SDValue, SDValue>
269  findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
270                           const SDValue &OldBase, const SDValue &OldOffset,
271                           unsigned Scale);
272
273  bool tryBitfieldExtractOp(SDNode *N);
274  bool tryBitfieldExtractOpFromSExt(SDNode *N);
275  bool tryBitfieldInsertOp(SDNode *N);
276  bool tryBitfieldInsertInZeroOp(SDNode *N);
277  bool tryShiftAmountMod(SDNode *N);
278  bool tryHighFPExt(SDNode *N);
279
280  bool tryReadRegister(SDNode *N);
281  bool tryWriteRegister(SDNode *N);
282
283// Include the pieces autogenerated from the target description.
284#include "AArch64GenDAGISel.inc"
285
286private:
287  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
288                             SDValue &Shift);
289  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
290                               SDValue &OffImm) {
291    return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
292  }
293  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
294                                     unsigned Size, SDValue &Base,
295                                     SDValue &OffImm);
296  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
297                             SDValue &OffImm);
298  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
299                              SDValue &OffImm);
300  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
301                         SDValue &Offset, SDValue &SignExtend,
302                         SDValue &DoShift);
303  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
304                         SDValue &Offset, SDValue &SignExtend,
305                         SDValue &DoShift);
306  bool isWorthFolding(SDValue V) const;
307  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
308                         SDValue &Offset, SDValue &SignExtend);
309
310  template<unsigned RegWidth>
311  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
312    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
313  }
314
315  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
316
317  bool SelectCMP_SWAP(SDNode *N);
318
319  bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);
320
321  bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
322
323  bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
324
325  bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
326  bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High,
327                           SDValue &Imm);
328
329  bool SelectSVEArithImm(SDValue N, SDValue &Imm);
330  bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
331                               SDValue &Offset);
332};
333} // end anonymous namespace
334
335/// isIntImmediate - This method tests to see if the node is a constant
336/// operand. If so Imm will receive the 32-bit value.
337static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
338  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
339    Imm = C->getZExtValue();
340    return true;
341  }
342  return false;
343}
344
345// isIntImmediate - This method tests to see if a constant operand.
346// If so Imm will receive the value.
347static bool isIntImmediate(SDValue N, uint64_t &Imm) {
348  return isIntImmediate(N.getNode(), Imm);
349}
350
351// isOpcWithIntImmediate - This method tests to see if the node is a specific
352// opcode and that it has a immediate integer right operand.
353// If so Imm will receive the 32 bit value.
354static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
355                                  uint64_t &Imm) {
356  return N->getOpcode() == Opc &&
357         isIntImmediate(N->getOperand(1).getNode(), Imm);
358}
359
360bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
361    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
362  switch(ConstraintID) {
363  default:
364    llvm_unreachable("Unexpected asm memory constraint");
365  case InlineAsm::Constraint_m:
366  case InlineAsm::Constraint_Q:
367    // We need to make sure that this one operand does not end up in XZR, thus
368    // require the address to be in a PointerRegClass register.
369    const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
370    const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
371    SDLoc dl(Op);
372    SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
373    SDValue NewOp =
374        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
375                                       dl, Op.getValueType(),
376                                       Op, RC), 0);
377    OutOps.push_back(NewOp);
378    return false;
379  }
380  return true;
381}
382
383/// SelectArithImmed - Select an immediate value that can be represented as
384/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
385/// Val set to the 12-bit value and Shift set to the shifter operand.
386bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
387                                           SDValue &Shift) {
388  // This function is called from the addsub_shifted_imm ComplexPattern,
389  // which lists [imm] as the list of opcode it's interested in, however
390  // we still need to check whether the operand is actually an immediate
391  // here because the ComplexPattern opcode list is only used in
392  // root-level opcode matching.
393  if (!isa<ConstantSDNode>(N.getNode()))
394    return false;
395
396  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
397  unsigned ShiftAmt;
398
399  if (Immed >> 12 == 0) {
400    ShiftAmt = 0;
401  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
402    ShiftAmt = 12;
403    Immed = Immed >> 12;
404  } else
405    return false;
406
407  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
408  SDLoc dl(N);
409  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
410  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
411  return true;
412}
413
414/// SelectNegArithImmed - As above, but negates the value before trying to
415/// select it.
416bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
417                                              SDValue &Shift) {
418  // This function is called from the addsub_shifted_imm ComplexPattern,
419  // which lists [imm] as the list of opcode it's interested in, however
420  // we still need to check whether the operand is actually an immediate
421  // here because the ComplexPattern opcode list is only used in
422  // root-level opcode matching.
423  if (!isa<ConstantSDNode>(N.getNode()))
424    return false;
425
426  // The immediate operand must be a 24-bit zero-extended immediate.
427  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
428
429  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
430  // have the opposite effect on the C flag, so this pattern mustn't match under
431  // those circumstances.
432  if (Immed == 0)
433    return false;
434
435  if (N.getValueType() == MVT::i32)
436    Immed = ~((uint32_t)Immed) + 1;
437  else
438    Immed = ~Immed + 1ULL;
439  if (Immed & 0xFFFFFFFFFF000000ULL)
440    return false;
441
442  Immed &= 0xFFFFFFULL;
443  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
444                          Shift);
445}
446
447/// getShiftTypeForNode - Translate a shift node to the corresponding
448/// ShiftType value.
449static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
450  switch (N.getOpcode()) {
451  default:
452    return AArch64_AM::InvalidShiftExtend;
453  case ISD::SHL:
454    return AArch64_AM::LSL;
455  case ISD::SRL:
456    return AArch64_AM::LSR;
457  case ISD::SRA:
458    return AArch64_AM::ASR;
459  case ISD::ROTR:
460    return AArch64_AM::ROR;
461  }
462}
463
464/// Determine whether it is worth it to fold SHL into the addressing
465/// mode.
466static bool isWorthFoldingSHL(SDValue V) {
467  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
468  // It is worth folding logical shift of up to three places.
469  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
470  if (!CSD)
471    return false;
472  unsigned ShiftVal = CSD->getZExtValue();
473  if (ShiftVal > 3)
474    return false;
475
476  // Check if this particular node is reused in any non-memory related
477  // operation.  If yes, do not try to fold this node into the address
478  // computation, since the computation will be kept.
479  const SDNode *Node = V.getNode();
480  for (SDNode *UI : Node->uses())
481    if (!isa<MemSDNode>(*UI))
482      for (SDNode *UII : UI->uses())
483        if (!isa<MemSDNode>(*UII))
484          return false;
485  return true;
486}
487
488/// Determine whether it is worth to fold V into an extended register.
489bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
490  // Trivial if we are optimizing for code size or if there is only
491  // one use of the value.
492  if (CurDAG->shouldOptForSize() || V.hasOneUse())
493    return true;
494  // If a subtarget has a fastpath LSL we can fold a logical shift into
495  // the addressing mode and save a cycle.
496  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
497      isWorthFoldingSHL(V))
498    return true;
499  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
500    const SDValue LHS = V.getOperand(0);
501    const SDValue RHS = V.getOperand(1);
502    if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
503      return true;
504    if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
505      return true;
506  }
507
508  // It hurts otherwise, since the value will be reused.
509  return false;
510}
511
512/// SelectShiftedRegister - Select a "shifted register" operand.  If the value
513/// is not shifted, set the Shift operand to default of "LSL 0".  The logical
514/// instructions allow the shifted register to be rotated, but the arithmetic
515/// instructions do not.  The AllowROR parameter specifies whether ROR is
516/// supported.
517bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
518                                                SDValue &Reg, SDValue &Shift) {
519  AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
520  if (ShType == AArch64_AM::InvalidShiftExtend)
521    return false;
522  if (!AllowROR && ShType == AArch64_AM::ROR)
523    return false;
524
525  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
526    unsigned BitSize = N.getValueSizeInBits();
527    unsigned Val = RHS->getZExtValue() & (BitSize - 1);
528    unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
529
530    Reg = N.getOperand(0);
531    Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
532    return isWorthFolding(N);
533  }
534
535  return false;
536}
537
538/// getExtendTypeForNode - Translate an extend node to the corresponding
539/// ExtendType value.
540static AArch64_AM::ShiftExtendType
541getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
542  if (N.getOpcode() == ISD::SIGN_EXTEND ||
543      N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
544    EVT SrcVT;
545    if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
546      SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
547    else
548      SrcVT = N.getOperand(0).getValueType();
549
550    if (!IsLoadStore && SrcVT == MVT::i8)
551      return AArch64_AM::SXTB;
552    else if (!IsLoadStore && SrcVT == MVT::i16)
553      return AArch64_AM::SXTH;
554    else if (SrcVT == MVT::i32)
555      return AArch64_AM::SXTW;
556    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
557
558    return AArch64_AM::InvalidShiftExtend;
559  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
560             N.getOpcode() == ISD::ANY_EXTEND) {
561    EVT SrcVT = N.getOperand(0).getValueType();
562    if (!IsLoadStore && SrcVT == MVT::i8)
563      return AArch64_AM::UXTB;
564    else if (!IsLoadStore && SrcVT == MVT::i16)
565      return AArch64_AM::UXTH;
566    else if (SrcVT == MVT::i32)
567      return AArch64_AM::UXTW;
568    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
569
570    return AArch64_AM::InvalidShiftExtend;
571  } else if (N.getOpcode() == ISD::AND) {
572    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
573    if (!CSD)
574      return AArch64_AM::InvalidShiftExtend;
575    uint64_t AndMask = CSD->getZExtValue();
576
577    switch (AndMask) {
578    default:
579      return AArch64_AM::InvalidShiftExtend;
580    case 0xFF:
581      return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
582    case 0xFFFF:
583      return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
584    case 0xFFFFFFFF:
585      return AArch64_AM::UXTW;
586    }
587  }
588
589  return AArch64_AM::InvalidShiftExtend;
590}
591
592// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
593static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
594  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
595      DL->getOpcode() != AArch64ISD::DUPLANE32)
596    return false;
597
598  SDValue SV = DL->getOperand(0);
599  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
600    return false;
601
602  SDValue EV = SV.getOperand(1);
603  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
604    return false;
605
606  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
607  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
608  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
609  LaneOp = EV.getOperand(0);
610
611  return true;
612}
613
614// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
615// high lane extract.
616static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
617                             SDValue &LaneOp, int &LaneIdx) {
618
619  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
620    std::swap(Op0, Op1);
621    if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
622      return false;
623  }
624  StdOp = Op1;
625  return true;
626}
627
628/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
629/// is a lane in the upper half of a 128-bit vector.  Recognize and select this
630/// so that we don't emit unnecessary lane extracts.
631bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
632  SDLoc dl(N);
633  SDValue Op0 = N->getOperand(0);
634  SDValue Op1 = N->getOperand(1);
635  SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
636  SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
637  int LaneIdx = -1; // Will hold the lane index.
638
639  if (Op1.getOpcode() != ISD::MUL ||
640      !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
641                        LaneIdx)) {
642    std::swap(Op0, Op1);
643    if (Op1.getOpcode() != ISD::MUL ||
644        !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
645                          LaneIdx))
646      return false;
647  }
648
649  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
650
651  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
652
653  unsigned MLAOpc = ~0U;
654
655  switch (N->getSimpleValueType(0).SimpleTy) {
656  default:
657    llvm_unreachable("Unrecognized MLA.");
658  case MVT::v4i16:
659    MLAOpc = AArch64::MLAv4i16_indexed;
660    break;
661  case MVT::v8i16:
662    MLAOpc = AArch64::MLAv8i16_indexed;
663    break;
664  case MVT::v2i32:
665    MLAOpc = AArch64::MLAv2i32_indexed;
666    break;
667  case MVT::v4i32:
668    MLAOpc = AArch64::MLAv4i32_indexed;
669    break;
670  }
671
672  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
673  return true;
674}
675
676bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
677  SDLoc dl(N);
678  SDValue SMULLOp0;
679  SDValue SMULLOp1;
680  int LaneIdx;
681
682  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
683                        LaneIdx))
684    return false;
685
686  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
687
688  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
689
690  unsigned SMULLOpc = ~0U;
691
692  if (IntNo == Intrinsic::aarch64_neon_smull) {
693    switch (N->getSimpleValueType(0).SimpleTy) {
694    default:
695      llvm_unreachable("Unrecognized SMULL.");
696    case MVT::v4i32:
697      SMULLOpc = AArch64::SMULLv4i16_indexed;
698      break;
699    case MVT::v2i64:
700      SMULLOpc = AArch64::SMULLv2i32_indexed;
701      break;
702    }
703  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
704    switch (N->getSimpleValueType(0).SimpleTy) {
705    default:
706      llvm_unreachable("Unrecognized SMULL.");
707    case MVT::v4i32:
708      SMULLOpc = AArch64::UMULLv4i16_indexed;
709      break;
710    case MVT::v2i64:
711      SMULLOpc = AArch64::UMULLv2i32_indexed;
712      break;
713    }
714  } else
715    llvm_unreachable("Unrecognized intrinsic.");
716
717  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
718  return true;
719}
720
721/// Instructions that accept extend modifiers like UXTW expect the register
722/// being extended to be a GPR32, but the incoming DAG might be acting on a
723/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
724/// this is the case.
725static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
726  if (N.getValueType() == MVT::i32)
727    return N;
728
729  SDLoc dl(N);
730  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
731  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
732                                               dl, MVT::i32, N, SubReg);
733  return SDValue(Node, 0);
734}
735
736// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
737template<signed Low, signed High, signed Scale>
738bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
739  if (!isa<ConstantSDNode>(N))
740    return false;
741
742  int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
743  if ((MulImm % std::abs(Scale)) == 0) {
744    int64_t RDVLImm = MulImm / Scale;
745    if ((RDVLImm >= Low) && (RDVLImm <= High)) {
746      Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
747      return true;
748    }
749  }
750
751  return false;
752}
753
754/// SelectArithExtendedRegister - Select a "extended register" operand.  This
755/// operand folds in an extend followed by an optional left shift.
756bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
757                                                      SDValue &Shift) {
758  unsigned ShiftVal = 0;
759  AArch64_AM::ShiftExtendType Ext;
760
761  if (N.getOpcode() == ISD::SHL) {
762    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
763    if (!CSD)
764      return false;
765    ShiftVal = CSD->getZExtValue();
766    if (ShiftVal > 4)
767      return false;
768
769    Ext = getExtendTypeForNode(N.getOperand(0));
770    if (Ext == AArch64_AM::InvalidShiftExtend)
771      return false;
772
773    Reg = N.getOperand(0).getOperand(0);
774  } else {
775    Ext = getExtendTypeForNode(N);
776    if (Ext == AArch64_AM::InvalidShiftExtend)
777      return false;
778
779    Reg = N.getOperand(0);
780
781    // Don't match if free 32-bit -> 64-bit zext can be used instead.
782    if (Ext == AArch64_AM::UXTW &&
783        Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
784      return false;
785  }
786
787  // AArch64 mandates that the RHS of the operation must use the smallest
788  // register class that could contain the size being extended from.  Thus,
789  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
790  // there might not be an actual 32-bit value in the program.  We can
791  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
792  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
793  Reg = narrowIfNeeded(CurDAG, Reg);
794  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
795                                    MVT::i32);
796  return isWorthFolding(N);
797}
798
799/// If there's a use of this ADDlow that's not itself a load/store then we'll
800/// need to create a real ADD instruction from it anyway and there's no point in
801/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
802/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
803/// leads to duplicated ADRP instructions.
804static bool isWorthFoldingADDlow(SDValue N) {
805  for (auto Use : N->uses()) {
806    if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
807        Use->getOpcode() != ISD::ATOMIC_LOAD &&
808        Use->getOpcode() != ISD::ATOMIC_STORE)
809      return false;
810
811    // ldar and stlr have much more restrictive addressing modes (just a
812    // register).
813    if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
814      return false;
815  }
816
817  return true;
818}
819
820/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
821/// immediate" address.  The "Size" argument is the size in bytes of the memory
822/// reference, which determines the scale.
823bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
824                                                        unsigned BW, unsigned Size,
825                                                        SDValue &Base,
826                                                        SDValue &OffImm) {
827  SDLoc dl(N);
828  const DataLayout &DL = CurDAG->getDataLayout();
829  const TargetLowering *TLI = getTargetLowering();
830  if (N.getOpcode() == ISD::FrameIndex) {
831    int FI = cast<FrameIndexSDNode>(N)->getIndex();
832    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
833    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
834    return true;
835  }
836
837  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
838  // selected here doesn't support labels/immediates, only base+offset.
839  if (CurDAG->isBaseWithConstantOffset(N)) {
840    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
841      if (IsSignedImm) {
842        int64_t RHSC = RHS->getSExtValue();
843        unsigned Scale = Log2_32(Size);
844        int64_t Range = 0x1LL << (BW - 1);
845
846        if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
847            RHSC < (Range << Scale)) {
848          Base = N.getOperand(0);
849          if (Base.getOpcode() == ISD::FrameIndex) {
850            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
851            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
852          }
853          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
854          return true;
855        }
856      } else {
857        // unsigned Immediate
858        uint64_t RHSC = RHS->getZExtValue();
859        unsigned Scale = Log2_32(Size);
860        uint64_t Range = 0x1ULL << BW;
861
862        if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
863          Base = N.getOperand(0);
864          if (Base.getOpcode() == ISD::FrameIndex) {
865            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
866            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
867          }
868          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
869          return true;
870        }
871      }
872    }
873  }
874  // Base only. The address will be materialized into a register before
875  // the memory is accessed.
876  //    add x0, Xbase, #offset
877  //    stp x1, x2, [x0]
878  Base = N;
879  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
880  return true;
881}
882
883/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
884/// immediate" address.  The "Size" argument is the size in bytes of the memory
885/// reference, which determines the scale.
886bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
887                                              SDValue &Base, SDValue &OffImm) {
888  SDLoc dl(N);
889  const DataLayout &DL = CurDAG->getDataLayout();
890  const TargetLowering *TLI = getTargetLowering();
891  if (N.getOpcode() == ISD::FrameIndex) {
892    int FI = cast<FrameIndexSDNode>(N)->getIndex();
893    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
894    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
895    return true;
896  }
897
898  if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
899    GlobalAddressSDNode *GAN =
900        dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
901    Base = N.getOperand(0);
902    OffImm = N.getOperand(1);
903    if (!GAN)
904      return true;
905
906    if (GAN->getOffset() % Size == 0 &&
907        GAN->getGlobal()->getPointerAlignment(DL) >= Size)
908      return true;
909  }
910
911  if (CurDAG->isBaseWithConstantOffset(N)) {
912    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
913      int64_t RHSC = (int64_t)RHS->getZExtValue();
914      unsigned Scale = Log2_32(Size);
915      if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
916        Base = N.getOperand(0);
917        if (Base.getOpcode() == ISD::FrameIndex) {
918          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
919          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
920        }
921        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
922        return true;
923      }
924    }
925  }
926
927  // Before falling back to our general case, check if the unscaled
928  // instructions can handle this. If so, that's preferable.
929  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
930    return false;
931
932  // Base only. The address will be materialized into a register before
933  // the memory is accessed.
934  //    add x0, Xbase, #offset
935  //    ldr x0, [x0]
936  Base = N;
937  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
938  return true;
939}
940
941/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
942/// immediate" address.  This should only match when there is an offset that
943/// is not valid for a scaled immediate addressing mode.  The "Size" argument
944/// is the size in bytes of the memory reference, which is needed here to know
945/// what is valid for a scaled immediate.
946bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
947                                                 SDValue &Base,
948                                                 SDValue &OffImm) {
949  if (!CurDAG->isBaseWithConstantOffset(N))
950    return false;
951  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
952    int64_t RHSC = RHS->getSExtValue();
953    // If the offset is valid as a scaled immediate, don't match here.
954    if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
955        RHSC < (0x1000 << Log2_32(Size)))
956      return false;
957    if (RHSC >= -256 && RHSC < 256) {
958      Base = N.getOperand(0);
959      if (Base.getOpcode() == ISD::FrameIndex) {
960        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
961        const TargetLowering *TLI = getTargetLowering();
962        Base = CurDAG->getTargetFrameIndex(
963            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
964      }
965      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
966      return true;
967    }
968  }
969  return false;
970}
971
972static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
973  SDLoc dl(N);
974  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
975  SDValue ImpDef = SDValue(
976      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
977  MachineSDNode *Node = CurDAG->getMachineNode(
978      TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
979  return SDValue(Node, 0);
980}
981
982/// Check if the given SHL node (\p N), can be used to form an
983/// extended register for an addressing mode.
984bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
985                                            bool WantExtend, SDValue &Offset,
986                                            SDValue &SignExtend) {
987  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
988  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
989  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
990    return false;
991
992  SDLoc dl(N);
993  if (WantExtend) {
994    AArch64_AM::ShiftExtendType Ext =
995        getExtendTypeForNode(N.getOperand(0), true);
996    if (Ext == AArch64_AM::InvalidShiftExtend)
997      return false;
998
999    Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1000    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1001                                           MVT::i32);
1002  } else {
1003    Offset = N.getOperand(0);
1004    SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1005  }
1006
1007  unsigned LegalShiftVal = Log2_32(Size);
1008  unsigned ShiftVal = CSD->getZExtValue();
1009
1010  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1011    return false;
1012
1013  return isWorthFolding(N);
1014}
1015
1016bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1017                                            SDValue &Base, SDValue &Offset,
1018                                            SDValue &SignExtend,
1019                                            SDValue &DoShift) {
1020  if (N.getOpcode() != ISD::ADD)
1021    return false;
1022  SDValue LHS = N.getOperand(0);
1023  SDValue RHS = N.getOperand(1);
1024  SDLoc dl(N);
1025
1026  // We don't want to match immediate adds here, because they are better lowered
1027  // to the register-immediate addressing modes.
1028  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1029    return false;
1030
1031  // Check if this particular node is reused in any non-memory related
1032  // operation.  If yes, do not try to fold this node into the address
1033  // computation, since the computation will be kept.
1034  const SDNode *Node = N.getNode();
1035  for (SDNode *UI : Node->uses()) {
1036    if (!isa<MemSDNode>(*UI))
1037      return false;
1038  }
1039
1040  // Remember if it is worth folding N when it produces extended register.
1041  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1042
1043  // Try to match a shifted extend on the RHS.
1044  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1045      SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1046    Base = LHS;
1047    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1048    return true;
1049  }
1050
1051  // Try to match a shifted extend on the LHS.
1052  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1053      SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1054    Base = RHS;
1055    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1056    return true;
1057  }
1058
1059  // There was no shift, whatever else we find.
1060  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1061
1062  AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1063  // Try to match an unshifted extend on the LHS.
1064  if (IsExtendedRegisterWorthFolding &&
1065      (Ext = getExtendTypeForNode(LHS, true)) !=
1066          AArch64_AM::InvalidShiftExtend) {
1067    Base = RHS;
1068    Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1069    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1070                                           MVT::i32);
1071    if (isWorthFolding(LHS))
1072      return true;
1073  }
1074
1075  // Try to match an unshifted extend on the RHS.
1076  if (IsExtendedRegisterWorthFolding &&
1077      (Ext = getExtendTypeForNode(RHS, true)) !=
1078          AArch64_AM::InvalidShiftExtend) {
1079    Base = LHS;
1080    Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1081    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1082                                           MVT::i32);
1083    if (isWorthFolding(RHS))
1084      return true;
1085  }
1086
1087  return false;
1088}
1089
1090// Check if the given immediate is preferred by ADD. If an immediate can be
1091// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1092// encoded by one MOVZ, return true.
1093static bool isPreferredADD(int64_t ImmOff) {
1094  // Constant in [0x0, 0xfff] can be encoded in ADD.
1095  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1096    return true;
1097  // Check if it can be encoded in an "ADD LSL #12".
1098  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1099    // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1100    return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1101           (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1102  return false;
1103}
1104
1105bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1106                                            SDValue &Base, SDValue &Offset,
1107                                            SDValue &SignExtend,
1108                                            SDValue &DoShift) {
1109  if (N.getOpcode() != ISD::ADD)
1110    return false;
1111  SDValue LHS = N.getOperand(0);
1112  SDValue RHS = N.getOperand(1);
1113  SDLoc DL(N);
1114
1115  // Check if this particular node is reused in any non-memory related
1116  // operation.  If yes, do not try to fold this node into the address
1117  // computation, since the computation will be kept.
1118  const SDNode *Node = N.getNode();
1119  for (SDNode *UI : Node->uses()) {
1120    if (!isa<MemSDNode>(*UI))
1121      return false;
1122  }
1123
1124  // Watch out if RHS is a wide immediate, it can not be selected into
1125  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1126  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1127  // instructions like:
1128  //     MOV  X0, WideImmediate
1129  //     ADD  X1, BaseReg, X0
1130  //     LDR  X2, [X1, 0]
1131  // For such situation, using [BaseReg, XReg] addressing mode can save one
1132  // ADD/SUB:
1133  //     MOV  X0, WideImmediate
1134  //     LDR  X2, [BaseReg, X0]
1135  if (isa<ConstantSDNode>(RHS)) {
1136    int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1137    unsigned Scale = Log2_32(Size);
1138    // Skip the immediate can be selected by load/store addressing mode.
1139    // Also skip the immediate can be encoded by a single ADD (SUB is also
1140    // checked by using -ImmOff).
1141    if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1142        isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1143      return false;
1144
1145    SDValue Ops[] = { RHS };
1146    SDNode *MOVI =
1147        CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1148    SDValue MOVIV = SDValue(MOVI, 0);
1149    // This ADD of two X register will be selected into [Reg+Reg] mode.
1150    N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1151  }
1152
1153  // Remember if it is worth folding N when it produces extended register.
1154  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1155
1156  // Try to match a shifted extend on the RHS.
1157  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1158      SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1159    Base = LHS;
1160    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1161    return true;
1162  }
1163
1164  // Try to match a shifted extend on the LHS.
1165  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1166      SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1167    Base = RHS;
1168    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1169    return true;
1170  }
1171
1172  // Match any non-shifted, non-extend, non-immediate add expression.
1173  Base = LHS;
1174  Offset = RHS;
1175  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1176  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1177  // Reg1 + Reg2 is free: no check needed.
1178  return true;
1179}
1180
1181SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1182  static const unsigned RegClassIDs[] = {
1183      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1184  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1185                                     AArch64::dsub2, AArch64::dsub3};
1186
1187  return createTuple(Regs, RegClassIDs, SubRegs);
1188}
1189
1190SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1191  static const unsigned RegClassIDs[] = {
1192      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1193  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1194                                     AArch64::qsub2, AArch64::qsub3};
1195
1196  return createTuple(Regs, RegClassIDs, SubRegs);
1197}
1198
1199SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1200  static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1201                                         AArch64::ZPR3RegClassID,
1202                                         AArch64::ZPR4RegClassID};
1203  static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1204                                     AArch64::zsub2, AArch64::zsub3};
1205
1206  return createTuple(Regs, RegClassIDs, SubRegs);
1207}
1208
1209SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1210                                         const unsigned RegClassIDs[],
1211                                         const unsigned SubRegs[]) {
1212  // There's no special register-class for a vector-list of 1 element: it's just
1213  // a vector.
1214  if (Regs.size() == 1)
1215    return Regs[0];
1216
1217  assert(Regs.size() >= 2 && Regs.size() <= 4);
1218
1219  SDLoc DL(Regs[0]);
1220
1221  SmallVector<SDValue, 4> Ops;
1222
1223  // First operand of REG_SEQUENCE is the desired RegClass.
1224  Ops.push_back(
1225      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1226
1227  // Then we get pairs of source & subregister-position for the components.
1228  for (unsigned i = 0; i < Regs.size(); ++i) {
1229    Ops.push_back(Regs[i]);
1230    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1231  }
1232
1233  SDNode *N =
1234      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1235  return SDValue(N, 0);
1236}
1237
1238void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1239                                      bool isExt) {
1240  SDLoc dl(N);
1241  EVT VT = N->getValueType(0);
1242
1243  unsigned ExtOff = isExt;
1244
1245  // Form a REG_SEQUENCE to force register allocation.
1246  unsigned Vec0Off = ExtOff + 1;
1247  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1248                               N->op_begin() + Vec0Off + NumVecs);
1249  SDValue RegSeq = createQTuple(Regs);
1250
1251  SmallVector<SDValue, 6> Ops;
1252  if (isExt)
1253    Ops.push_back(N->getOperand(1));
1254  Ops.push_back(RegSeq);
1255  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1256  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1257}
1258
1259bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1260  LoadSDNode *LD = cast<LoadSDNode>(N);
1261  if (LD->isUnindexed())
1262    return false;
1263  EVT VT = LD->getMemoryVT();
1264  EVT DstVT = N->getValueType(0);
1265  ISD::MemIndexedMode AM = LD->getAddressingMode();
1266  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1267
1268  // We're not doing validity checking here. That was done when checking
1269  // if we should mark the load as indexed or not. We're just selecting
1270  // the right instruction.
1271  unsigned Opcode = 0;
1272
1273  ISD::LoadExtType ExtType = LD->getExtensionType();
1274  bool InsertTo64 = false;
1275  if (VT == MVT::i64)
1276    Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1277  else if (VT == MVT::i32) {
1278    if (ExtType == ISD::NON_EXTLOAD)
1279      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1280    else if (ExtType == ISD::SEXTLOAD)
1281      Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1282    else {
1283      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1284      InsertTo64 = true;
1285      // The result of the load is only i32. It's the subreg_to_reg that makes
1286      // it into an i64.
1287      DstVT = MVT::i32;
1288    }
1289  } else if (VT == MVT::i16) {
1290    if (ExtType == ISD::SEXTLOAD) {
1291      if (DstVT == MVT::i64)
1292        Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1293      else
1294        Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1295    } else {
1296      Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1297      InsertTo64 = DstVT == MVT::i64;
1298      // The result of the load is only i32. It's the subreg_to_reg that makes
1299      // it into an i64.
1300      DstVT = MVT::i32;
1301    }
1302  } else if (VT == MVT::i8) {
1303    if (ExtType == ISD::SEXTLOAD) {
1304      if (DstVT == MVT::i64)
1305        Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1306      else
1307        Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1308    } else {
1309      Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1310      InsertTo64 = DstVT == MVT::i64;
1311      // The result of the load is only i32. It's the subreg_to_reg that makes
1312      // it into an i64.
1313      DstVT = MVT::i32;
1314    }
1315  } else if (VT == MVT::f16) {
1316    Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1317  } else if (VT == MVT::bf16) {
1318    Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1319  } else if (VT == MVT::f32) {
1320    Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1321  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1322    Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1323  } else if (VT.is128BitVector()) {
1324    Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1325  } else
1326    return false;
1327  SDValue Chain = LD->getChain();
1328  SDValue Base = LD->getBasePtr();
1329  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1330  int OffsetVal = (int)OffsetOp->getZExtValue();
1331  SDLoc dl(N);
1332  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1333  SDValue Ops[] = { Base, Offset, Chain };
1334  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1335                                       MVT::Other, Ops);
1336  // Either way, we're replacing the node, so tell the caller that.
1337  SDValue LoadedVal = SDValue(Res, 1);
1338  if (InsertTo64) {
1339    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1340    LoadedVal =
1341        SDValue(CurDAG->getMachineNode(
1342                    AArch64::SUBREG_TO_REG, dl, MVT::i64,
1343                    CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1344                    SubReg),
1345                0);
1346  }
1347
1348  ReplaceUses(SDValue(N, 0), LoadedVal);
1349  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1350  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1351  CurDAG->RemoveDeadNode(N);
1352  return true;
1353}
1354
1355void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1356                                     unsigned SubRegIdx) {
1357  SDLoc dl(N);
1358  EVT VT = N->getValueType(0);
1359  SDValue Chain = N->getOperand(0);
1360
1361  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1362                   Chain};
1363
1364  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1365
1366  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1367  SDValue SuperReg = SDValue(Ld, 0);
1368  for (unsigned i = 0; i < NumVecs; ++i)
1369    ReplaceUses(SDValue(N, i),
1370        CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1371
1372  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1373
1374  // Transfer memoperands.
1375  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1376  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1377
1378  CurDAG->RemoveDeadNode(N);
1379}
1380
1381void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1382                                         unsigned Opc, unsigned SubRegIdx) {
1383  SDLoc dl(N);
1384  EVT VT = N->getValueType(0);
1385  SDValue Chain = N->getOperand(0);
1386
1387  SDValue Ops[] = {N->getOperand(1), // Mem operand
1388                   N->getOperand(2), // Incremental
1389                   Chain};
1390
1391  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1392                        MVT::Untyped, MVT::Other};
1393
1394  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1395
1396  // Update uses of write back register
1397  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1398
1399  // Update uses of vector list
1400  SDValue SuperReg = SDValue(Ld, 1);
1401  if (NumVecs == 1)
1402    ReplaceUses(SDValue(N, 0), SuperReg);
1403  else
1404    for (unsigned i = 0; i < NumVecs; ++i)
1405      ReplaceUses(SDValue(N, i),
1406          CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1407
1408  // Update the chain
1409  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1410  CurDAG->RemoveDeadNode(N);
1411}
1412
1413/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1414/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1415/// new Base and an SDValue representing the new offset.
1416std::tuple<unsigned, SDValue, SDValue>
1417AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1418                                              unsigned Opc_ri,
1419                                              const SDValue &OldBase,
1420                                              const SDValue &OldOffset,
1421                                              unsigned Scale) {
1422  SDValue NewBase = OldBase;
1423  SDValue NewOffset = OldOffset;
1424  // Detect a possible Reg+Imm addressing mode.
1425  const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1426      N, OldBase, NewBase, NewOffset);
1427
1428  // Detect a possible reg+reg addressing mode, but only if we haven't already
1429  // detected a Reg+Imm one.
1430  const bool IsRegReg =
1431      !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1432
1433  // Select the instruction.
1434  return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1435}
1436
1437void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1438                                               unsigned Scale, unsigned Opc_ri,
1439                                               unsigned Opc_rr) {
1440  assert(Scale < 4 && "Invalid scaling value.");
1441  SDLoc DL(N);
1442  EVT VT = N->getValueType(0);
1443  SDValue Chain = N->getOperand(0);
1444
1445  // Optimize addressing mode.
1446  SDValue Base, Offset;
1447  unsigned Opc;
1448  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1449      N, Opc_rr, Opc_ri, N->getOperand(2),
1450      CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1451
1452  SDValue Ops[] = {N->getOperand(1), // Predicate
1453                   Base,             // Memory operand
1454                   Offset, Chain};
1455
1456  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1457
1458  SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1459  SDValue SuperReg = SDValue(Load, 0);
1460  for (unsigned i = 0; i < NumVecs; ++i)
1461    ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1462                                   AArch64::zsub0 + i, DL, VT, SuperReg));
1463
1464  // Copy chain
1465  unsigned ChainIdx = NumVecs;
1466  ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1467  CurDAG->RemoveDeadNode(N);
1468}
1469
1470void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1471                                      unsigned Opc) {
1472  SDLoc dl(N);
1473  EVT VT = N->getOperand(2)->getValueType(0);
1474
1475  // Form a REG_SEQUENCE to force register allocation.
1476  bool Is128Bit = VT.getSizeInBits() == 128;
1477  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1478  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1479
1480  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1481  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1482
1483  // Transfer memoperands.
1484  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1485  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1486
1487  ReplaceNode(N, St);
1488}
1489
1490void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
1491                                                unsigned Scale, unsigned Opc_rr,
1492                                                unsigned Opc_ri) {
1493  SDLoc dl(N);
1494
1495  // Form a REG_SEQUENCE to force register allocation.
1496  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1497  SDValue RegSeq = createZTuple(Regs);
1498
1499  // Optimize addressing mode.
1500  unsigned Opc;
1501  SDValue Offset, Base;
1502  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1503      N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
1504      CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
1505
1506  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
1507                   Base,                               // address
1508                   Offset,                             // offset
1509                   N->getOperand(0)};                  // chain
1510  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1511
1512  ReplaceNode(N, St);
1513}
1514
1515bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
1516                                                      SDValue &OffImm) {
1517  SDLoc dl(N);
1518  const DataLayout &DL = CurDAG->getDataLayout();
1519  const TargetLowering *TLI = getTargetLowering();
1520
1521  // Try to match it for the frame address
1522  if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
1523    int FI = FINode->getIndex();
1524    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1525    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1526    return true;
1527  }
1528
1529  return false;
1530}
1531
1532void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1533                                          unsigned Opc) {
1534  SDLoc dl(N);
1535  EVT VT = N->getOperand(2)->getValueType(0);
1536  const EVT ResTys[] = {MVT::i64,    // Type of the write back register
1537                        MVT::Other}; // Type for the Chain
1538
1539  // Form a REG_SEQUENCE to force register allocation.
1540  bool Is128Bit = VT.getSizeInBits() == 128;
1541  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1542  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1543
1544  SDValue Ops[] = {RegSeq,
1545                   N->getOperand(NumVecs + 1), // base register
1546                   N->getOperand(NumVecs + 2), // Incremental
1547                   N->getOperand(0)};          // Chain
1548  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1549
1550  ReplaceNode(N, St);
1551}
1552
1553namespace {
1554/// WidenVector - Given a value in the V64 register class, produce the
1555/// equivalent value in the V128 register class.
1556class WidenVector {
1557  SelectionDAG &DAG;
1558
1559public:
1560  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1561
1562  SDValue operator()(SDValue V64Reg) {
1563    EVT VT = V64Reg.getValueType();
1564    unsigned NarrowSize = VT.getVectorNumElements();
1565    MVT EltTy = VT.getVectorElementType().getSimpleVT();
1566    MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1567    SDLoc DL(V64Reg);
1568
1569    SDValue Undef =
1570        SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1571    return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1572  }
1573};
1574} // namespace
1575
1576/// NarrowVector - Given a value in the V128 register class, produce the
1577/// equivalent value in the V64 register class.
1578static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1579  EVT VT = V128Reg.getValueType();
1580  unsigned WideSize = VT.getVectorNumElements();
1581  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1582  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1583
1584  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1585                                    V128Reg);
1586}
1587
1588void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1589                                         unsigned Opc) {
1590  SDLoc dl(N);
1591  EVT VT = N->getValueType(0);
1592  bool Narrow = VT.getSizeInBits() == 64;
1593
1594  // Form a REG_SEQUENCE to force register allocation.
1595  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1596
1597  if (Narrow)
1598    transform(Regs, Regs.begin(),
1599                   WidenVector(*CurDAG));
1600
1601  SDValue RegSeq = createQTuple(Regs);
1602
1603  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1604
1605  unsigned LaneNo =
1606      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1607
1608  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1609                   N->getOperand(NumVecs + 3), N->getOperand(0)};
1610  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1611  SDValue SuperReg = SDValue(Ld, 0);
1612
1613  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1614  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1615                                    AArch64::qsub2, AArch64::qsub3 };
1616  for (unsigned i = 0; i < NumVecs; ++i) {
1617    SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1618    if (Narrow)
1619      NV = NarrowVector(NV, *CurDAG);
1620    ReplaceUses(SDValue(N, i), NV);
1621  }
1622
1623  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1624  CurDAG->RemoveDeadNode(N);
1625}
1626
1627void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1628                                             unsigned Opc) {
1629  SDLoc dl(N);
1630  EVT VT = N->getValueType(0);
1631  bool Narrow = VT.getSizeInBits() == 64;
1632
1633  // Form a REG_SEQUENCE to force register allocation.
1634  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1635
1636  if (Narrow)
1637    transform(Regs, Regs.begin(),
1638                   WidenVector(*CurDAG));
1639
1640  SDValue RegSeq = createQTuple(Regs);
1641
1642  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1643                        RegSeq->getValueType(0), MVT::Other};
1644
1645  unsigned LaneNo =
1646      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1647
1648  SDValue Ops[] = {RegSeq,
1649                   CurDAG->getTargetConstant(LaneNo, dl,
1650                                             MVT::i64),         // Lane Number
1651                   N->getOperand(NumVecs + 2),                  // Base register
1652                   N->getOperand(NumVecs + 3),                  // Incremental
1653                   N->getOperand(0)};
1654  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1655
1656  // Update uses of the write back register
1657  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1658
1659  // Update uses of the vector list
1660  SDValue SuperReg = SDValue(Ld, 1);
1661  if (NumVecs == 1) {
1662    ReplaceUses(SDValue(N, 0),
1663                Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1664  } else {
1665    EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1666    static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1667                                      AArch64::qsub2, AArch64::qsub3 };
1668    for (unsigned i = 0; i < NumVecs; ++i) {
1669      SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1670                                                  SuperReg);
1671      if (Narrow)
1672        NV = NarrowVector(NV, *CurDAG);
1673      ReplaceUses(SDValue(N, i), NV);
1674    }
1675  }
1676
1677  // Update the Chain
1678  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1679  CurDAG->RemoveDeadNode(N);
1680}
1681
1682void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1683                                          unsigned Opc) {
1684  SDLoc dl(N);
1685  EVT VT = N->getOperand(2)->getValueType(0);
1686  bool Narrow = VT.getSizeInBits() == 64;
1687
1688  // Form a REG_SEQUENCE to force register allocation.
1689  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1690
1691  if (Narrow)
1692    transform(Regs, Regs.begin(),
1693                   WidenVector(*CurDAG));
1694
1695  SDValue RegSeq = createQTuple(Regs);
1696
1697  unsigned LaneNo =
1698      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1699
1700  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1701                   N->getOperand(NumVecs + 3), N->getOperand(0)};
1702  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1703
1704  // Transfer memoperands.
1705  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1706  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1707
1708  ReplaceNode(N, St);
1709}
1710
1711void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1712                                              unsigned Opc) {
1713  SDLoc dl(N);
1714  EVT VT = N->getOperand(2)->getValueType(0);
1715  bool Narrow = VT.getSizeInBits() == 64;
1716
1717  // Form a REG_SEQUENCE to force register allocation.
1718  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1719
1720  if (Narrow)
1721    transform(Regs, Regs.begin(),
1722                   WidenVector(*CurDAG));
1723
1724  SDValue RegSeq = createQTuple(Regs);
1725
1726  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1727                        MVT::Other};
1728
1729  unsigned LaneNo =
1730      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1731
1732  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1733                   N->getOperand(NumVecs + 2), // Base Register
1734                   N->getOperand(NumVecs + 3), // Incremental
1735                   N->getOperand(0)};
1736  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1737
1738  // Transfer memoperands.
1739  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1740  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1741
1742  ReplaceNode(N, St);
1743}
1744
1745static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
1746                                       unsigned &Opc, SDValue &Opd0,
1747                                       unsigned &LSB, unsigned &MSB,
1748                                       unsigned NumberOfIgnoredLowBits,
1749                                       bool BiggerPattern) {
1750  assert(N->getOpcode() == ISD::AND &&
1751         "N must be a AND operation to call this function");
1752
1753  EVT VT = N->getValueType(0);
1754
1755  // Here we can test the type of VT and return false when the type does not
1756  // match, but since it is done prior to that call in the current context
1757  // we turned that into an assert to avoid redundant code.
1758  assert((VT == MVT::i32 || VT == MVT::i64) &&
1759         "Type checking must have been done before calling this function");
1760
1761  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1762  // changed the AND node to a 32-bit mask operation. We'll have to
1763  // undo that as part of the transform here if we want to catch all
1764  // the opportunities.
1765  // Currently the NumberOfIgnoredLowBits argument helps to recover
1766  // form these situations when matching bigger pattern (bitfield insert).
1767
1768  // For unsigned extracts, check for a shift right and mask
1769  uint64_t AndImm = 0;
1770  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1771    return false;
1772
1773  const SDNode *Op0 = N->getOperand(0).getNode();
1774
1775  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1776  // simplified. Try to undo that
1777  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1778
1779  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1780  if (AndImm & (AndImm + 1))
1781    return false;
1782
1783  bool ClampMSB = false;
1784  uint64_t SrlImm = 0;
1785  // Handle the SRL + ANY_EXTEND case.
1786  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1787      isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1788    // Extend the incoming operand of the SRL to 64-bit.
1789    Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1790    // Make sure to clamp the MSB so that we preserve the semantics of the
1791    // original operations.
1792    ClampMSB = true;
1793  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1794             isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
1795                                   SrlImm)) {
1796    // If the shift result was truncated, we can still combine them.
1797    Opd0 = Op0->getOperand(0).getOperand(0);
1798
1799    // Use the type of SRL node.
1800    VT = Opd0->getValueType(0);
1801  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1802    Opd0 = Op0->getOperand(0);
1803  } else if (BiggerPattern) {
1804    // Let's pretend a 0 shift right has been performed.
1805    // The resulting code will be at least as good as the original one
1806    // plus it may expose more opportunities for bitfield insert pattern.
1807    // FIXME: Currently we limit this to the bigger pattern, because
1808    // some optimizations expect AND and not UBFM.
1809    Opd0 = N->getOperand(0);
1810  } else
1811    return false;
1812
1813  // Bail out on large immediates. This happens when no proper
1814  // combining/constant folding was performed.
1815  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1816    LLVM_DEBUG(
1817        (dbgs() << N
1818                << ": Found large shift immediate, this should not happen\n"));
1819    return false;
1820  }
1821
1822  LSB = SrlImm;
1823  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1824                                 : countTrailingOnes<uint64_t>(AndImm)) -
1825        1;
1826  if (ClampMSB)
1827    // Since we're moving the extend before the right shift operation, we need
1828    // to clamp the MSB to make sure we don't shift in undefined bits instead of
1829    // the zeros which would get shifted in with the original right shift
1830    // operation.
1831    MSB = MSB > 31 ? 31 : MSB;
1832
1833  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1834  return true;
1835}
1836
1837static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1838                                             SDValue &Opd0, unsigned &Immr,
1839                                             unsigned &Imms) {
1840  assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
1841
1842  EVT VT = N->getValueType(0);
1843  unsigned BitWidth = VT.getSizeInBits();
1844  assert((VT == MVT::i32 || VT == MVT::i64) &&
1845         "Type checking must have been done before calling this function");
1846
1847  SDValue Op = N->getOperand(0);
1848  if (Op->getOpcode() == ISD::TRUNCATE) {
1849    Op = Op->getOperand(0);
1850    VT = Op->getValueType(0);
1851    BitWidth = VT.getSizeInBits();
1852  }
1853
1854  uint64_t ShiftImm;
1855  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1856      !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1857    return false;
1858
1859  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1860  if (ShiftImm + Width > BitWidth)
1861    return false;
1862
1863  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1864  Opd0 = Op.getOperand(0);
1865  Immr = ShiftImm;
1866  Imms = ShiftImm + Width - 1;
1867  return true;
1868}
1869
1870static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1871                                          SDValue &Opd0, unsigned &LSB,
1872                                          unsigned &MSB) {
1873  // We are looking for the following pattern which basically extracts several
1874  // continuous bits from the source value and places it from the LSB of the
1875  // destination value, all other bits of the destination value or set to zero:
1876  //
1877  // Value2 = AND Value, MaskImm
1878  // SRL Value2, ShiftImm
1879  //
1880  // with MaskImm >> ShiftImm to search for the bit width.
1881  //
1882  // This gets selected into a single UBFM:
1883  //
1884  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1885  //
1886
1887  if (N->getOpcode() != ISD::SRL)
1888    return false;
1889
1890  uint64_t AndMask = 0;
1891  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1892    return false;
1893
1894  Opd0 = N->getOperand(0).getOperand(0);
1895
1896  uint64_t SrlImm = 0;
1897  if (!isIntImmediate(N->getOperand(1), SrlImm))
1898    return false;
1899
1900  // Check whether we really have several bits extract here.
1901  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1902  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1903    if (N->getValueType(0) == MVT::i32)
1904      Opc = AArch64::UBFMWri;
1905    else
1906      Opc = AArch64::UBFMXri;
1907
1908    LSB = SrlImm;
1909    MSB = BitWide + SrlImm - 1;
1910    return true;
1911  }
1912
1913  return false;
1914}
1915
1916static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1917                                       unsigned &Immr, unsigned &Imms,
1918                                       bool BiggerPattern) {
1919  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1920         "N must be a SHR/SRA operation to call this function");
1921
1922  EVT VT = N->getValueType(0);
1923
1924  // Here we can test the type of VT and return false when the type does not
1925  // match, but since it is done prior to that call in the current context
1926  // we turned that into an assert to avoid redundant code.
1927  assert((VT == MVT::i32 || VT == MVT::i64) &&
1928         "Type checking must have been done before calling this function");
1929
1930  // Check for AND + SRL doing several bits extract.
1931  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1932    return true;
1933
1934  // We're looking for a shift of a shift.
1935  uint64_t ShlImm = 0;
1936  uint64_t TruncBits = 0;
1937  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1938    Opd0 = N->getOperand(0).getOperand(0);
1939  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1940             N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1941    // We are looking for a shift of truncate. Truncate from i64 to i32 could
1942    // be considered as setting high 32 bits as zero. Our strategy here is to
1943    // always generate 64bit UBFM. This consistency will help the CSE pass
1944    // later find more redundancy.
1945    Opd0 = N->getOperand(0).getOperand(0);
1946    TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1947    VT = Opd0.getValueType();
1948    assert(VT == MVT::i64 && "the promoted type should be i64");
1949  } else if (BiggerPattern) {
1950    // Let's pretend a 0 shift left has been performed.
1951    // FIXME: Currently we limit this to the bigger pattern case,
1952    // because some optimizations expect AND and not UBFM
1953    Opd0 = N->getOperand(0);
1954  } else
1955    return false;
1956
1957  // Missing combines/constant folding may have left us with strange
1958  // constants.
1959  if (ShlImm >= VT.getSizeInBits()) {
1960    LLVM_DEBUG(
1961        (dbgs() << N
1962                << ": Found large shift immediate, this should not happen\n"));
1963    return false;
1964  }
1965
1966  uint64_t SrlImm = 0;
1967  if (!isIntImmediate(N->getOperand(1), SrlImm))
1968    return false;
1969
1970  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1971         "bad amount in shift node!");
1972  int immr = SrlImm - ShlImm;
1973  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1974  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1975  // SRA requires a signed extraction
1976  if (VT == MVT::i32)
1977    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1978  else
1979    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1980  return true;
1981}
1982
1983bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1984  assert(N->getOpcode() == ISD::SIGN_EXTEND);
1985
1986  EVT VT = N->getValueType(0);
1987  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1988  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1989    return false;
1990
1991  uint64_t ShiftImm;
1992  SDValue Op = N->getOperand(0);
1993  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1994    return false;
1995
1996  SDLoc dl(N);
1997  // Extend the incoming operand of the shift to 64-bits.
1998  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1999  unsigned Immr = ShiftImm;
2000  unsigned Imms = NarrowVT.getSizeInBits() - 1;
2001  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2002                   CurDAG->getTargetConstant(Imms, dl, VT)};
2003  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2004  return true;
2005}
2006
2007/// Try to form fcvtl2 instructions from a floating-point extend of a high-half
2008/// extract of a subvector.
2009bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
2010  assert(N->getOpcode() == ISD::FP_EXTEND);
2011
2012  // There are 2 forms of fcvtl2 - extend to double or extend to float.
2013  SDValue Extract = N->getOperand(0);
2014  EVT VT = N->getValueType(0);
2015  EVT NarrowVT = Extract.getValueType();
2016  if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
2017      (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
2018    return false;
2019
2020  // Optionally look past a bitcast.
2021  Extract = peekThroughBitcasts(Extract);
2022  if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2023    return false;
2024
2025  // Match extract from start of high half index.
2026  // Example: v8i16 -> v4i16 means the extract must begin at index 4.
2027  unsigned ExtractIndex = Extract.getConstantOperandVal(1);
2028  if (ExtractIndex != Extract.getValueType().getVectorNumElements())
2029    return false;
2030
2031  auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
2032  CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
2033  return true;
2034}
2035
2036static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2037                                SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2038                                unsigned NumberOfIgnoredLowBits = 0,
2039                                bool BiggerPattern = false) {
2040  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2041    return false;
2042
2043  switch (N->getOpcode()) {
2044  default:
2045    if (!N->isMachineOpcode())
2046      return false;
2047    break;
2048  case ISD::AND:
2049    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2050                                      NumberOfIgnoredLowBits, BiggerPattern);
2051  case ISD::SRL:
2052  case ISD::SRA:
2053    return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2054
2055  case ISD::SIGN_EXTEND_INREG:
2056    return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2057  }
2058
2059  unsigned NOpc = N->getMachineOpcode();
2060  switch (NOpc) {
2061  default:
2062    return false;
2063  case AArch64::SBFMWri:
2064  case AArch64::UBFMWri:
2065  case AArch64::SBFMXri:
2066  case AArch64::UBFMXri:
2067    Opc = NOpc;
2068    Opd0 = N->getOperand(0);
2069    Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
2070    Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
2071    return true;
2072  }
2073  // Unreachable
2074  return false;
2075}
2076
2077bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2078  unsigned Opc, Immr, Imms;
2079  SDValue Opd0;
2080  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2081    return false;
2082
2083  EVT VT = N->getValueType(0);
2084  SDLoc dl(N);
2085
2086  // If the bit extract operation is 64bit but the original type is 32bit, we
2087  // need to add one EXTRACT_SUBREG.
2088  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2089    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2090                       CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2091
2092    SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2093    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
2094    ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2095                                          MVT::i32, SDValue(BFM, 0), SubReg));
2096    return true;
2097  }
2098
2099  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2100                   CurDAG->getTargetConstant(Imms, dl, VT)};
2101  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2102  return true;
2103}
2104
2105/// Does DstMask form a complementary pair with the mask provided by
2106/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2107/// this asks whether DstMask zeroes precisely those bits that will be set by
2108/// the other half.
2109static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2110                              unsigned NumberOfIgnoredHighBits, EVT VT) {
2111  assert((VT == MVT::i32 || VT == MVT::i64) &&
2112         "i32 or i64 mask type expected!");
2113  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2114
2115  APInt SignificantDstMask = APInt(BitWidth, DstMask);
2116  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2117
2118  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2119         (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
2120}
2121
2122// Look for bits that will be useful for later uses.
2123// A bit is consider useless as soon as it is dropped and never used
2124// before it as been dropped.
2125// E.g., looking for useful bit of x
2126// 1. y = x & 0x7
2127// 2. z = y >> 2
2128// After #1, x useful bits are 0x7, then the useful bits of x, live through
2129// y.
2130// After #2, the useful bits of x are 0x4.
2131// However, if x is used on an unpredicatable instruction, then all its bits
2132// are useful.
2133// E.g.
2134// 1. y = x & 0x7
2135// 2. z = y >> 2
2136// 3. str x, [@x]
2137static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2138
2139static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2140                                              unsigned Depth) {
2141  uint64_t Imm =
2142      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2143  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2144  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2145  getUsefulBits(Op, UsefulBits, Depth + 1);
2146}
2147
2148static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2149                                             uint64_t Imm, uint64_t MSB,
2150                                             unsigned Depth) {
2151  // inherit the bitwidth value
2152  APInt OpUsefulBits(UsefulBits);
2153  OpUsefulBits = 1;
2154
2155  if (MSB >= Imm) {
2156    OpUsefulBits <<= MSB - Imm + 1;
2157    --OpUsefulBits;
2158    // The interesting part will be in the lower part of the result
2159    getUsefulBits(Op, OpUsefulBits, Depth + 1);
2160    // The interesting part was starting at Imm in the argument
2161    OpUsefulBits <<= Imm;
2162  } else {
2163    OpUsefulBits <<= MSB + 1;
2164    --OpUsefulBits;
2165    // The interesting part will be shifted in the result
2166    OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2167    getUsefulBits(Op, OpUsefulBits, Depth + 1);
2168    // The interesting part was at zero in the argument
2169    OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2170  }
2171
2172  UsefulBits &= OpUsefulBits;
2173}
2174
2175static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2176                                  unsigned Depth) {
2177  uint64_t Imm =
2178      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2179  uint64_t MSB =
2180      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2181
2182  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2183}
2184
2185static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2186                                              unsigned Depth) {
2187  uint64_t ShiftTypeAndValue =
2188      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2189  APInt Mask(UsefulBits);
2190  Mask.clearAllBits();
2191  Mask.flipAllBits();
2192
2193  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2194    // Shift Left
2195    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2196    Mask <<= ShiftAmt;
2197    getUsefulBits(Op, Mask, Depth + 1);
2198    Mask.lshrInPlace(ShiftAmt);
2199  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2200    // Shift Right
2201    // We do not handle AArch64_AM::ASR, because the sign will change the
2202    // number of useful bits
2203    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2204    Mask.lshrInPlace(ShiftAmt);
2205    getUsefulBits(Op, Mask, Depth + 1);
2206    Mask <<= ShiftAmt;
2207  } else
2208    return;
2209
2210  UsefulBits &= Mask;
2211}
2212
2213static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2214                                 unsigned Depth) {
2215  uint64_t Imm =
2216      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2217  uint64_t MSB =
2218      cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2219
2220  APInt OpUsefulBits(UsefulBits);
2221  OpUsefulBits = 1;
2222
2223  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2224  ResultUsefulBits.flipAllBits();
2225  APInt Mask(UsefulBits.getBitWidth(), 0);
2226
2227  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2228
2229  if (MSB >= Imm) {
2230    // The instruction is a BFXIL.
2231    uint64_t Width = MSB - Imm + 1;
2232    uint64_t LSB = Imm;
2233
2234    OpUsefulBits <<= Width;
2235    --OpUsefulBits;
2236
2237    if (Op.getOperand(1) == Orig) {
2238      // Copy the low bits from the result to bits starting from LSB.
2239      Mask = ResultUsefulBits & OpUsefulBits;
2240      Mask <<= LSB;
2241    }
2242
2243    if (Op.getOperand(0) == Orig)
2244      // Bits starting from LSB in the input contribute to the result.
2245      Mask |= (ResultUsefulBits & ~OpUsefulBits);
2246  } else {
2247    // The instruction is a BFI.
2248    uint64_t Width = MSB + 1;
2249    uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2250
2251    OpUsefulBits <<= Width;
2252    --OpUsefulBits;
2253    OpUsefulBits <<= LSB;
2254
2255    if (Op.getOperand(1) == Orig) {
2256      // Copy the bits from the result to the zero bits.
2257      Mask = ResultUsefulBits & OpUsefulBits;
2258      Mask.lshrInPlace(LSB);
2259    }
2260
2261    if (Op.getOperand(0) == Orig)
2262      Mask |= (ResultUsefulBits & ~OpUsefulBits);
2263  }
2264
2265  UsefulBits &= Mask;
2266}
2267
2268static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2269                                SDValue Orig, unsigned Depth) {
2270
2271  // Users of this node should have already been instruction selected
2272  // FIXME: Can we turn that into an assert?
2273  if (!UserNode->isMachineOpcode())
2274    return;
2275
2276  switch (UserNode->getMachineOpcode()) {
2277  default:
2278    return;
2279  case AArch64::ANDSWri:
2280  case AArch64::ANDSXri:
2281  case AArch64::ANDWri:
2282  case AArch64::ANDXri:
2283    // We increment Depth only when we call the getUsefulBits
2284    return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2285                                             Depth);
2286  case AArch64::UBFMWri:
2287  case AArch64::UBFMXri:
2288    return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2289
2290  case AArch64::ORRWrs:
2291  case AArch64::ORRXrs:
2292    if (UserNode->getOperand(1) != Orig)
2293      return;
2294    return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2295                                             Depth);
2296  case AArch64::BFMWri:
2297  case AArch64::BFMXri:
2298    return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2299
2300  case AArch64::STRBBui:
2301  case AArch64::STURBBi:
2302    if (UserNode->getOperand(0) != Orig)
2303      return;
2304    UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2305    return;
2306
2307  case AArch64::STRHHui:
2308  case AArch64::STURHHi:
2309    if (UserNode->getOperand(0) != Orig)
2310      return;
2311    UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2312    return;
2313  }
2314}
2315
2316static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2317  if (Depth >= SelectionDAG::MaxRecursionDepth)
2318    return;
2319  // Initialize UsefulBits
2320  if (!Depth) {
2321    unsigned Bitwidth = Op.getScalarValueSizeInBits();
2322    // At the beginning, assume every produced bits is useful
2323    UsefulBits = APInt(Bitwidth, 0);
2324    UsefulBits.flipAllBits();
2325  }
2326  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2327
2328  for (SDNode *Node : Op.getNode()->uses()) {
2329    // A use cannot produce useful bits
2330    APInt UsefulBitsForUse = APInt(UsefulBits);
2331    getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2332    UsersUsefulBits |= UsefulBitsForUse;
2333  }
2334  // UsefulBits contains the produced bits that are meaningful for the
2335  // current definition, thus a user cannot make a bit meaningful at
2336  // this point
2337  UsefulBits &= UsersUsefulBits;
2338}
2339
2340/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2341/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2342/// 0, return Op unchanged.
2343static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2344  if (ShlAmount == 0)
2345    return Op;
2346
2347  EVT VT = Op.getValueType();
2348  SDLoc dl(Op);
2349  unsigned BitWidth = VT.getSizeInBits();
2350  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2351
2352  SDNode *ShiftNode;
2353  if (ShlAmount > 0) {
2354    // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2355    ShiftNode = CurDAG->getMachineNode(
2356        UBFMOpc, dl, VT, Op,
2357        CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2358        CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2359  } else {
2360    // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2361    assert(ShlAmount < 0 && "expected right shift");
2362    int ShrAmount = -ShlAmount;
2363    ShiftNode = CurDAG->getMachineNode(
2364        UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2365        CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2366  }
2367
2368  return SDValue(ShiftNode, 0);
2369}
2370
2371/// Does this tree qualify as an attempt to move a bitfield into position,
2372/// essentially "(and (shl VAL, N), Mask)".
2373static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2374                                    bool BiggerPattern,
2375                                    SDValue &Src, int &ShiftAmount,
2376                                    int &MaskWidth) {
2377  EVT VT = Op.getValueType();
2378  unsigned BitWidth = VT.getSizeInBits();
2379  (void)BitWidth;
2380  assert(BitWidth == 32 || BitWidth == 64);
2381
2382  KnownBits Known = CurDAG->computeKnownBits(Op);
2383
2384  // Non-zero in the sense that they're not provably zero, which is the key
2385  // point if we want to use this value
2386  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2387
2388  // Discard a constant AND mask if present. It's safe because the node will
2389  // already have been factored into the computeKnownBits calculation above.
2390  uint64_t AndImm;
2391  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2392    assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2393    Op = Op.getOperand(0);
2394  }
2395
2396  // Don't match if the SHL has more than one use, since then we'll end up
2397  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2398  if (!BiggerPattern && !Op.hasOneUse())
2399    return false;
2400
2401  uint64_t ShlImm;
2402  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2403    return false;
2404  Op = Op.getOperand(0);
2405
2406  if (!isShiftedMask_64(NonZeroBits))
2407    return false;
2408
2409  ShiftAmount = countTrailingZeros(NonZeroBits);
2410  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2411
2412  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2413  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2414  // amount.  BiggerPattern is true when this pattern is being matched for BFI,
2415  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2416  // which case it is not profitable to insert an extra shift.
2417  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2418    return false;
2419  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2420
2421  return true;
2422}
2423
2424static bool isShiftedMask(uint64_t Mask, EVT VT) {
2425  assert(VT == MVT::i32 || VT == MVT::i64);
2426  if (VT == MVT::i32)
2427    return isShiftedMask_32(Mask);
2428  return isShiftedMask_64(Mask);
2429}
2430
2431// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2432// inserted only sets known zero bits.
2433static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
2434  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2435
2436  EVT VT = N->getValueType(0);
2437  if (VT != MVT::i32 && VT != MVT::i64)
2438    return false;
2439
2440  unsigned BitWidth = VT.getSizeInBits();
2441
2442  uint64_t OrImm;
2443  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2444    return false;
2445
2446  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2447  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2448  // performance neutral.
2449  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2450    return false;
2451
2452  uint64_t MaskImm;
2453  SDValue And = N->getOperand(0);
2454  // Must be a single use AND with an immediate operand.
2455  if (!And.hasOneUse() ||
2456      !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2457    return false;
2458
2459  // Compute the Known Zero for the AND as this allows us to catch more general
2460  // cases than just looking for AND with imm.
2461  KnownBits Known = CurDAG->computeKnownBits(And);
2462
2463  // Non-zero in the sense that they're not provably zero, which is the key
2464  // point if we want to use this value.
2465  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2466
2467  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2468  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2469    return false;
2470
2471  // The bits being inserted must only set those bits that are known to be zero.
2472  if ((OrImm & NotKnownZero) != 0) {
2473    // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2474    // currently handle this case.
2475    return false;
2476  }
2477
2478  // BFI/BFXIL dst, src, #lsb, #width.
2479  int LSB = countTrailingOnes(NotKnownZero);
2480  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2481
2482  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2483  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2484  unsigned ImmS = Width - 1;
2485
2486  // If we're creating a BFI instruction avoid cases where we need more
2487  // instructions to materialize the BFI constant as compared to the original
2488  // ORR.  A BFXIL will use the same constant as the original ORR, so the code
2489  // should be no worse in this case.
2490  bool IsBFI = LSB != 0;
2491  uint64_t BFIImm = OrImm >> LSB;
2492  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2493    // We have a BFI instruction and we know the constant can't be materialized
2494    // with a ORR-immediate with the zero register.
2495    unsigned OrChunks = 0, BFIChunks = 0;
2496    for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2497      if (((OrImm >> Shift) & 0xFFFF) != 0)
2498        ++OrChunks;
2499      if (((BFIImm >> Shift) & 0xFFFF) != 0)
2500        ++BFIChunks;
2501    }
2502    if (BFIChunks > OrChunks)
2503      return false;
2504  }
2505
2506  // Materialize the constant to be inserted.
2507  SDLoc DL(N);
2508  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2509  SDNode *MOVI = CurDAG->getMachineNode(
2510      MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2511
2512  // Create the BFI/BFXIL instruction.
2513  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2514                   CurDAG->getTargetConstant(ImmR, DL, VT),
2515                   CurDAG->getTargetConstant(ImmS, DL, VT)};
2516  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2517  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2518  return true;
2519}
2520
2521static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2522                                      SelectionDAG *CurDAG) {
2523  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2524
2525  EVT VT = N->getValueType(0);
2526  if (VT != MVT::i32 && VT != MVT::i64)
2527    return false;
2528
2529  unsigned BitWidth = VT.getSizeInBits();
2530
2531  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2532  // have the expected shape. Try to undo that.
2533
2534  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2535  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2536
2537  // Given a OR operation, check if we have the following pattern
2538  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2539  //                       isBitfieldExtractOp)
2540  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2541  //                 countTrailingZeros(mask2) == imm2 - imm + 1
2542  // f = d | c
2543  // if yes, replace the OR instruction with:
2544  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2545
2546  // OR is commutative, check all combinations of operand order and values of
2547  // BiggerPattern, i.e.
2548  //     Opd0, Opd1, BiggerPattern=false
2549  //     Opd1, Opd0, BiggerPattern=false
2550  //     Opd0, Opd1, BiggerPattern=true
2551  //     Opd1, Opd0, BiggerPattern=true
2552  // Several of these combinations may match, so check with BiggerPattern=false
2553  // first since that will produce better results by matching more instructions
2554  // and/or inserting fewer extra instructions.
2555  for (int I = 0; I < 4; ++I) {
2556
2557    SDValue Dst, Src;
2558    unsigned ImmR, ImmS;
2559    bool BiggerPattern = I / 2;
2560    SDValue OrOpd0Val = N->getOperand(I % 2);
2561    SDNode *OrOpd0 = OrOpd0Val.getNode();
2562    SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2563    SDNode *OrOpd1 = OrOpd1Val.getNode();
2564
2565    unsigned BFXOpc;
2566    int DstLSB, Width;
2567    if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2568                            NumberOfIgnoredLowBits, BiggerPattern)) {
2569      // Check that the returned opcode is compatible with the pattern,
2570      // i.e., same type and zero extended (U and not S)
2571      if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2572          (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2573        continue;
2574
2575      // Compute the width of the bitfield insertion
2576      DstLSB = 0;
2577      Width = ImmS - ImmR + 1;
2578      // FIXME: This constraint is to catch bitfield insertion we may
2579      // want to widen the pattern if we want to grab general bitfied
2580      // move case
2581      if (Width <= 0)
2582        continue;
2583
2584      // If the mask on the insertee is correct, we have a BFXIL operation. We
2585      // can share the ImmR and ImmS values from the already-computed UBFM.
2586    } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2587                                       BiggerPattern,
2588                                       Src, DstLSB, Width)) {
2589      ImmR = (BitWidth - DstLSB) % BitWidth;
2590      ImmS = Width - 1;
2591    } else
2592      continue;
2593
2594    // Check the second part of the pattern
2595    EVT VT = OrOpd1Val.getValueType();
2596    assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2597
2598    // Compute the Known Zero for the candidate of the first operand.
2599    // This allows to catch more general case than just looking for
2600    // AND with imm. Indeed, simplify-demanded-bits may have removed
2601    // the AND instruction because it proves it was useless.
2602    KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2603
2604    // Check if there is enough room for the second operand to appear
2605    // in the first one
2606    APInt BitsToBeInserted =
2607        APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2608
2609    if ((BitsToBeInserted & ~Known.Zero) != 0)
2610      continue;
2611
2612    // Set the first operand
2613    uint64_t Imm;
2614    if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2615        isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2616      // In that case, we can eliminate the AND
2617      Dst = OrOpd1->getOperand(0);
2618    else
2619      // Maybe the AND has been removed by simplify-demanded-bits
2620      // or is useful because it discards more bits
2621      Dst = OrOpd1Val;
2622
2623    // both parts match
2624    SDLoc DL(N);
2625    SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2626                     CurDAG->getTargetConstant(ImmS, DL, VT)};
2627    unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2628    CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2629    return true;
2630  }
2631
2632  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2633  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2634  // mask (e.g., 0x000ffff0).
2635  uint64_t Mask0Imm, Mask1Imm;
2636  SDValue And0 = N->getOperand(0);
2637  SDValue And1 = N->getOperand(1);
2638  if (And0.hasOneUse() && And1.hasOneUse() &&
2639      isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2640      isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2641      APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2642      (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2643
2644    // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2645    // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2646    // bits to be inserted.
2647    if (isShiftedMask(Mask0Imm, VT)) {
2648      std::swap(And0, And1);
2649      std::swap(Mask0Imm, Mask1Imm);
2650    }
2651
2652    SDValue Src = And1->getOperand(0);
2653    SDValue Dst = And0->getOperand(0);
2654    unsigned LSB = countTrailingZeros(Mask1Imm);
2655    int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2656
2657    // The BFXIL inserts the low-order bits from a source register, so right
2658    // shift the needed bits into place.
2659    SDLoc DL(N);
2660    unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2661    SDNode *LSR = CurDAG->getMachineNode(
2662        ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2663        CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2664
2665    // BFXIL is an alias of BFM, so translate to BFM operands.
2666    unsigned ImmR = (BitWidth - LSB) % BitWidth;
2667    unsigned ImmS = Width - 1;
2668
2669    // Create the BFXIL instruction.
2670    SDValue Ops[] = {Dst, SDValue(LSR, 0),
2671                     CurDAG->getTargetConstant(ImmR, DL, VT),
2672                     CurDAG->getTargetConstant(ImmS, DL, VT)};
2673    unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2674    CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2675    return true;
2676  }
2677
2678  return false;
2679}
2680
2681bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2682  if (N->getOpcode() != ISD::OR)
2683    return false;
2684
2685  APInt NUsefulBits;
2686  getUsefulBits(SDValue(N, 0), NUsefulBits);
2687
2688  // If all bits are not useful, just return UNDEF.
2689  if (!NUsefulBits) {
2690    CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2691    return true;
2692  }
2693
2694  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2695    return true;
2696
2697  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2698}
2699
2700/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2701/// equivalent of a left shift by a constant amount followed by an and masking
2702/// out a contiguous set of bits.
2703bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2704  if (N->getOpcode() != ISD::AND)
2705    return false;
2706
2707  EVT VT = N->getValueType(0);
2708  if (VT != MVT::i32 && VT != MVT::i64)
2709    return false;
2710
2711  SDValue Op0;
2712  int DstLSB, Width;
2713  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2714                               Op0, DstLSB, Width))
2715    return false;
2716
2717  // ImmR is the rotate right amount.
2718  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2719  // ImmS is the most significant bit of the source to be moved.
2720  unsigned ImmS = Width - 1;
2721
2722  SDLoc DL(N);
2723  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2724                   CurDAG->getTargetConstant(ImmS, DL, VT)};
2725  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2726  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2727  return true;
2728}
2729
2730/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2731/// variable shift/rotate instructions.
2732bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2733  EVT VT = N->getValueType(0);
2734
2735  unsigned Opc;
2736  switch (N->getOpcode()) {
2737  case ISD::ROTR:
2738    Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2739    break;
2740  case ISD::SHL:
2741    Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2742    break;
2743  case ISD::SRL:
2744    Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2745    break;
2746  case ISD::SRA:
2747    Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2748    break;
2749  default:
2750    return false;
2751  }
2752
2753  uint64_t Size;
2754  uint64_t Bits;
2755  if (VT == MVT::i32) {
2756    Bits = 5;
2757    Size = 32;
2758  } else if (VT == MVT::i64) {
2759    Bits = 6;
2760    Size = 64;
2761  } else
2762    return false;
2763
2764  SDValue ShiftAmt = N->getOperand(1);
2765  SDLoc DL(N);
2766  SDValue NewShiftAmt;
2767
2768  // Skip over an extend of the shift amount.
2769  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2770      ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2771    ShiftAmt = ShiftAmt->getOperand(0);
2772
2773  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2774    SDValue Add0 = ShiftAmt->getOperand(0);
2775    SDValue Add1 = ShiftAmt->getOperand(1);
2776    uint64_t Add0Imm;
2777    uint64_t Add1Imm;
2778    // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2779    // to avoid the ADD/SUB.
2780    if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2781      NewShiftAmt = Add0;
2782    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2783    // generate a NEG instead of a SUB of a constant.
2784    else if (ShiftAmt->getOpcode() == ISD::SUB &&
2785             isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2786             (Add0Imm % Size == 0)) {
2787      unsigned NegOpc;
2788      unsigned ZeroReg;
2789      EVT SubVT = ShiftAmt->getValueType(0);
2790      if (SubVT == MVT::i32) {
2791        NegOpc = AArch64::SUBWrr;
2792        ZeroReg = AArch64::WZR;
2793      } else {
2794        assert(SubVT == MVT::i64);
2795        NegOpc = AArch64::SUBXrr;
2796        ZeroReg = AArch64::XZR;
2797      }
2798      SDValue Zero =
2799          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2800      MachineSDNode *Neg =
2801          CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2802      NewShiftAmt = SDValue(Neg, 0);
2803    } else
2804      return false;
2805  } else {
2806    // If the shift amount is masked with an AND, check that the mask covers the
2807    // bits that are implicitly ANDed off by the above opcodes and if so, skip
2808    // the AND.
2809    uint64_t MaskImm;
2810    if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
2811        !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
2812      return false;
2813
2814    if (countTrailingOnes(MaskImm) < Bits)
2815      return false;
2816
2817    NewShiftAmt = ShiftAmt->getOperand(0);
2818  }
2819
2820  // Narrow/widen the shift amount to match the size of the shift operation.
2821  if (VT == MVT::i32)
2822    NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2823  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2824    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2825    MachineSDNode *Ext = CurDAG->getMachineNode(
2826        AArch64::SUBREG_TO_REG, DL, VT,
2827        CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2828    NewShiftAmt = SDValue(Ext, 0);
2829  }
2830
2831  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2832  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2833  return true;
2834}
2835
2836bool
2837AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2838                                              unsigned RegWidth) {
2839  APFloat FVal(0.0);
2840  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2841    FVal = CN->getValueAPF();
2842  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2843    // Some otherwise illegal constants are allowed in this case.
2844    if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2845        !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2846      return false;
2847
2848    ConstantPoolSDNode *CN =
2849        dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2850    FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2851  } else
2852    return false;
2853
2854  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2855  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2856  // x-register.
2857  //
2858  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2859  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2860  // integers.
2861  bool IsExact;
2862
2863  // fbits is between 1 and 64 in the worst-case, which means the fmul
2864  // could have 2^64 as an actual operand. Need 65 bits of precision.
2865  APSInt IntVal(65, true);
2866  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2867
2868  // N.b. isPowerOf2 also checks for > 0.
2869  if (!IsExact || !IntVal.isPowerOf2()) return false;
2870  unsigned FBits = IntVal.logBase2();
2871
2872  // Checks above should have guaranteed that we haven't lost information in
2873  // finding FBits, but it must still be in range.
2874  if (FBits == 0 || FBits > RegWidth) return false;
2875
2876  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2877  return true;
2878}
2879
2880// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2881// of the string and obtains the integer values from them and combines these
2882// into a single value to be used in the MRS/MSR instruction.
2883static int getIntOperandFromRegisterString(StringRef RegString) {
2884  SmallVector<StringRef, 5> Fields;
2885  RegString.split(Fields, ':');
2886
2887  if (Fields.size() == 1)
2888    return -1;
2889
2890  assert(Fields.size() == 5
2891            && "Invalid number of fields in read register string");
2892
2893  SmallVector<int, 5> Ops;
2894  bool AllIntFields = true;
2895
2896  for (StringRef Field : Fields) {
2897    unsigned IntField;
2898    AllIntFields &= !Field.getAsInteger(10, IntField);
2899    Ops.push_back(IntField);
2900  }
2901
2902  assert(AllIntFields &&
2903          "Unexpected non-integer value in special register string.");
2904
2905  // Need to combine the integer fields of the string into a single value
2906  // based on the bit encoding of MRS/MSR instruction.
2907  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2908         (Ops[3] << 3) | (Ops[4]);
2909}
2910
2911// Lower the read_register intrinsic to an MRS instruction node if the special
2912// register string argument is either of the form detailed in the ALCE (the
2913// form described in getIntOperandsFromRegsterString) or is a named register
2914// known by the MRS SysReg mapper.
2915bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2916  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2917  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2918  SDLoc DL(N);
2919
2920  int Reg = getIntOperandFromRegisterString(RegString->getString());
2921  if (Reg != -1) {
2922    ReplaceNode(N, CurDAG->getMachineNode(
2923                       AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2924                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2925                       N->getOperand(0)));
2926    return true;
2927  }
2928
2929  // Use the sysreg mapper to map the remaining possible strings to the
2930  // value for the register to be used for the instruction operand.
2931  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2932  if (TheReg && TheReg->Readable &&
2933      TheReg->haveFeatures(Subtarget->getFeatureBits()))
2934    Reg = TheReg->Encoding;
2935  else
2936    Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2937
2938  if (Reg != -1) {
2939    ReplaceNode(N, CurDAG->getMachineNode(
2940                       AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2941                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2942                       N->getOperand(0)));
2943    return true;
2944  }
2945
2946  if (RegString->getString() == "pc") {
2947    ReplaceNode(N, CurDAG->getMachineNode(
2948                       AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
2949                       CurDAG->getTargetConstant(0, DL, MVT::i32),
2950                       N->getOperand(0)));
2951    return true;
2952  }
2953
2954  return false;
2955}
2956
2957// Lower the write_register intrinsic to an MSR instruction node if the special
2958// register string argument is either of the form detailed in the ALCE (the
2959// form described in getIntOperandsFromRegsterString) or is a named register
2960// known by the MSR SysReg mapper.
2961bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2962  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2963  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2964  SDLoc DL(N);
2965
2966  int Reg = getIntOperandFromRegisterString(RegString->getString());
2967  if (Reg != -1) {
2968    ReplaceNode(
2969        N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2970                                  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2971                                  N->getOperand(2), N->getOperand(0)));
2972    return true;
2973  }
2974
2975  // Check if the register was one of those allowed as the pstatefield value in
2976  // the MSR (immediate) instruction. To accept the values allowed in the
2977  // pstatefield for the MSR (immediate) instruction, we also require that an
2978  // immediate value has been provided as an argument, we know that this is
2979  // the case as it has been ensured by semantic checking.
2980  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2981  if (PMapper) {
2982    assert (isa<ConstantSDNode>(N->getOperand(2))
2983              && "Expected a constant integer expression.");
2984    unsigned Reg = PMapper->Encoding;
2985    uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2986    unsigned State;
2987    if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2988      assert(Immed < 2 && "Bad imm");
2989      State = AArch64::MSRpstateImm1;
2990    } else {
2991      assert(Immed < 16 && "Bad imm");
2992      State = AArch64::MSRpstateImm4;
2993    }
2994    ReplaceNode(N, CurDAG->getMachineNode(
2995                       State, DL, MVT::Other,
2996                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2997                       CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2998                       N->getOperand(0)));
2999    return true;
3000  }
3001
3002  // Use the sysreg mapper to attempt to map the remaining possible strings
3003  // to the value for the register to be used for the MSR (register)
3004  // instruction operand.
3005  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3006  if (TheReg && TheReg->Writeable &&
3007      TheReg->haveFeatures(Subtarget->getFeatureBits()))
3008    Reg = TheReg->Encoding;
3009  else
3010    Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
3011  if (Reg != -1) {
3012    ReplaceNode(N, CurDAG->getMachineNode(
3013                       AArch64::MSR, DL, MVT::Other,
3014                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3015                       N->getOperand(2), N->getOperand(0)));
3016    return true;
3017  }
3018
3019  return false;
3020}
3021
3022/// We've got special pseudo-instructions for these
3023bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3024  unsigned Opcode;
3025  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3026
3027  // Leave IR for LSE if subtarget supports it.
3028  if (Subtarget->hasLSE()) return false;
3029
3030  if (MemTy == MVT::i8)
3031    Opcode = AArch64::CMP_SWAP_8;
3032  else if (MemTy == MVT::i16)
3033    Opcode = AArch64::CMP_SWAP_16;
3034  else if (MemTy == MVT::i32)
3035    Opcode = AArch64::CMP_SWAP_32;
3036  else if (MemTy == MVT::i64)
3037    Opcode = AArch64::CMP_SWAP_64;
3038  else
3039    llvm_unreachable("Unknown AtomicCmpSwap type");
3040
3041  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3042  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3043                   N->getOperand(0)};
3044  SDNode *CmpSwap = CurDAG->getMachineNode(
3045      Opcode, SDLoc(N),
3046      CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3047
3048  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3049  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3050
3051  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3052  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3053  CurDAG->RemoveDeadNode(N);
3054
3055  return true;
3056}
3057
3058bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base,
3059                                                  SDValue &Offset) {
3060  auto C = dyn_cast<ConstantSDNode>(N);
3061  if (!C)
3062    return false;
3063
3064  auto Ty = N->getValueType(0);
3065
3066  int64_t Imm = C->getSExtValue();
3067  SDLoc DL(N);
3068
3069  if ((Imm >= -128) && (Imm <= 127)) {
3070    Base = CurDAG->getTargetConstant(Imm, DL, Ty);
3071    Offset = CurDAG->getTargetConstant(0, DL, Ty);
3072    return true;
3073  }
3074
3075  if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) {
3076    Base = CurDAG->getTargetConstant(Imm/256, DL, Ty);
3077    Offset = CurDAG->getTargetConstant(8, DL, Ty);
3078    return true;
3079  }
3080
3081  return false;
3082}
3083
3084bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) {
3085  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3086    const int64_t ImmVal = CNode->getZExtValue();
3087    SDLoc DL(N);
3088
3089    switch (VT.SimpleTy) {
3090    case MVT::i8:
3091      if ((ImmVal & 0xFF) == ImmVal) {
3092        Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3093        Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3094        return true;
3095      }
3096      break;
3097    case MVT::i16:
3098    case MVT::i32:
3099    case MVT::i64:
3100      if ((ImmVal & 0xFF) == ImmVal) {
3101        Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3102        Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3103        return true;
3104      } else if ((ImmVal & 0xFF00) == ImmVal) {
3105        Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3106        Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32);
3107        return true;
3108      }
3109      break;
3110    default:
3111      break;
3112    }
3113  }
3114
3115  return false;
3116}
3117
3118bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
3119  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3120    int64_t ImmVal = CNode->getSExtValue();
3121    SDLoc DL(N);
3122    if (ImmVal >= -128 && ImmVal < 128) {
3123      Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3124      return true;
3125    }
3126  }
3127  return false;
3128}
3129
3130bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) {
3131  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3132    uint64_t ImmVal = CNode->getSExtValue();
3133    SDLoc DL(N);
3134    ImmVal = ImmVal & 0xFF;
3135    if (ImmVal < 256) {
3136      Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3137      return true;
3138    }
3139  }
3140  return false;
3141}
3142
3143bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
3144  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3145    uint64_t ImmVal = CNode->getZExtValue();
3146    SDLoc DL(N);
3147
3148    // Shift mask depending on type size.
3149    switch (VT.SimpleTy) {
3150      case MVT::i8:
3151        ImmVal &= 0xFF;
3152        ImmVal |= ImmVal << 8;
3153        ImmVal |= ImmVal << 16;
3154        ImmVal |= ImmVal << 32;
3155        break;
3156      case MVT::i16:
3157        ImmVal &= 0xFFFF;
3158        ImmVal |= ImmVal << 16;
3159        ImmVal |= ImmVal << 32;
3160        break;
3161      case MVT::i32:
3162        ImmVal &= 0xFFFFFFFF;
3163        ImmVal |= ImmVal << 32;
3164        break;
3165      case MVT::i64:
3166        break;
3167      default:
3168        llvm_unreachable("Unexpected type");
3169    }
3170
3171    uint64_t encoding;
3172    if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
3173      Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
3174      return true;
3175    }
3176  }
3177  return false;
3178}
3179
3180// This method is only needed to "cast" i64s into i32s when the value
3181// is a valid shift which has been splatted into a vector with i64 elements.
3182// Every other type is fine in tablegen.
3183bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low,
3184                                              uint64_t High, SDValue &Imm) {
3185  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3186    uint64_t ImmVal = CN->getZExtValue();
3187    SDLoc DL(N);
3188
3189    if (ImmVal >= Low && ImmVal <= High) {
3190      Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3191      return true;
3192    }
3193  }
3194
3195  return false;
3196}
3197
3198bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
3199  // tagp(FrameIndex, IRGstack, tag_offset):
3200  // since the offset between FrameIndex and IRGstack is a compile-time
3201  // constant, this can be lowered to a single ADDG instruction.
3202  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
3203    return false;
3204  }
3205
3206  SDValue IRG_SP = N->getOperand(2);
3207  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
3208      cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
3209          Intrinsic::aarch64_irg_sp) {
3210    return false;
3211  }
3212
3213  const TargetLowering *TLI = getTargetLowering();
3214  SDLoc DL(N);
3215  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
3216  SDValue FiOp = CurDAG->getTargetFrameIndex(
3217      FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3218  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
3219
3220  SDNode *Out = CurDAG->getMachineNode(
3221      AArch64::TAGPstack, DL, MVT::i64,
3222      {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
3223       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
3224  ReplaceNode(N, Out);
3225  return true;
3226}
3227
3228void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
3229  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
3230         "llvm.aarch64.tagp third argument must be an immediate");
3231  if (trySelectStackSlotTagP(N))
3232    return;
3233  // FIXME: above applies in any case when offset between Op1 and Op2 is a
3234  // compile-time constant, not just for stack allocations.
3235
3236  // General case for unrelated pointers in Op1 and Op2.
3237  SDLoc DL(N);
3238  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
3239  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
3240                                      {N->getOperand(1), N->getOperand(2)});
3241  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
3242                                      {SDValue(N1, 0), N->getOperand(2)});
3243  SDNode *N3 = CurDAG->getMachineNode(
3244      AArch64::ADDG, DL, MVT::i64,
3245      {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
3246       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
3247  ReplaceNode(N, N3);
3248}
3249
3250// NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
3251// vector types larger than NEON don't have a matching SubRegIndex.
3252static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
3253  assert(V.getValueType().isScalableVector() &&
3254         V.getValueType().getSizeInBits().getKnownMinSize() ==
3255             AArch64::SVEBitsPerBlock &&
3256         "Expected to extract from a packed scalable vector!");
3257  assert(VT.isFixedLengthVector() &&
3258         "Expected to extract a fixed length vector!");
3259
3260  SDLoc DL(V);
3261  switch (VT.getSizeInBits()) {
3262  case 64: {
3263    auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
3264    return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
3265  }
3266  case 128: {
3267    auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
3268    return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
3269  }
3270  default: {
3271    auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
3272    return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3273  }
3274  }
3275}
3276
3277// NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
3278// vector types larger than NEON don't have a matching SubRegIndex.
3279static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
3280  assert(VT.isScalableVector() &&
3281         VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock &&
3282         "Expected to insert into a packed scalable vector!");
3283  assert(V.getValueType().isFixedLengthVector() &&
3284         "Expected to insert a fixed length vector!");
3285
3286  SDLoc DL(V);
3287  switch (V.getValueType().getSizeInBits()) {
3288  case 64: {
3289    auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
3290    auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
3291    return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
3292                               SDValue(Container, 0), V, SubReg);
3293  }
3294  case 128: {
3295    auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
3296    auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
3297    return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
3298                               SDValue(Container, 0), V, SubReg);
3299  }
3300  default: {
3301    auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
3302    return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3303  }
3304  }
3305}
3306
3307void AArch64DAGToDAGISel::Select(SDNode *Node) {
3308  // If we have a custom node, we already have selected!
3309  if (Node->isMachineOpcode()) {
3310    LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
3311    Node->setNodeId(-1);
3312    return;
3313  }
3314
3315  // Few custom selection stuff.
3316  EVT VT = Node->getValueType(0);
3317
3318  switch (Node->getOpcode()) {
3319  default:
3320    break;
3321
3322  case ISD::ATOMIC_CMP_SWAP:
3323    if (SelectCMP_SWAP(Node))
3324      return;
3325    break;
3326
3327  case ISD::READ_REGISTER:
3328    if (tryReadRegister(Node))
3329      return;
3330    break;
3331
3332  case ISD::WRITE_REGISTER:
3333    if (tryWriteRegister(Node))
3334      return;
3335    break;
3336
3337  case ISD::ADD:
3338    if (tryMLAV64LaneV128(Node))
3339      return;
3340    break;
3341
3342  case ISD::LOAD: {
3343    // Try to select as an indexed load. Fall through to normal processing
3344    // if we can't.
3345    if (tryIndexedLoad(Node))
3346      return;
3347    break;
3348  }
3349
3350  case ISD::SRL:
3351  case ISD::AND:
3352  case ISD::SRA:
3353  case ISD::SIGN_EXTEND_INREG:
3354    if (tryBitfieldExtractOp(Node))
3355      return;
3356    if (tryBitfieldInsertInZeroOp(Node))
3357      return;
3358    LLVM_FALLTHROUGH;
3359  case ISD::ROTR:
3360  case ISD::SHL:
3361    if (tryShiftAmountMod(Node))
3362      return;
3363    break;
3364
3365  case ISD::SIGN_EXTEND:
3366    if (tryBitfieldExtractOpFromSExt(Node))
3367      return;
3368    break;
3369
3370  case ISD::FP_EXTEND:
3371    if (tryHighFPExt(Node))
3372      return;
3373    break;
3374
3375  case ISD::OR:
3376    if (tryBitfieldInsertOp(Node))
3377      return;
3378    break;
3379
3380  case ISD::EXTRACT_SUBVECTOR: {
3381    // Bail when not a "cast" like extract_subvector.
3382    if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
3383      break;
3384
3385    // Bail when normal isel can do the job.
3386    EVT InVT = Node->getOperand(0).getValueType();
3387    if (VT.isScalableVector() || InVT.isFixedLengthVector())
3388      break;
3389
3390    // NOTE: We can only get here when doing fixed length SVE code generation.
3391    // We do manual selection because the types involved are not linked to real
3392    // registers (despite being legal) and must be coerced into SVE registers.
3393    //
3394    // NOTE: If the above changes, be aware that selection will still not work
3395    // because the td definition of extract_vector does not support extracting
3396    // a fixed length vector from a scalable vector.
3397
3398    ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
3399    return;
3400  }
3401
3402  case ISD::INSERT_SUBVECTOR: {
3403    // Bail when not a "cast" like insert_subvector.
3404    if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
3405      break;
3406    if (!Node->getOperand(0).isUndef())
3407      break;
3408
3409    // Bail when normal isel should do the job.
3410    EVT InVT = Node->getOperand(1).getValueType();
3411    if (VT.isFixedLengthVector() || InVT.isScalableVector())
3412      break;
3413
3414    // NOTE: We can only get here when doing fixed length SVE code generation.
3415    // We do manual selection because the types involved are not linked to real
3416    // registers (despite being legal) and must be coerced into SVE registers.
3417    //
3418    // NOTE: If the above changes, be aware that selection will still not work
3419    // because the td definition of insert_vector does not support inserting a
3420    // fixed length vector into a scalable vector.
3421
3422    ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
3423    return;
3424  }
3425
3426  case ISD::Constant: {
3427    // Materialize zero constants as copies from WZR/XZR.  This allows
3428    // the coalescer to propagate these into other instructions.
3429    ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
3430    if (ConstNode->isNullValue()) {
3431      if (VT == MVT::i32) {
3432        SDValue New = CurDAG->getCopyFromReg(
3433            CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
3434        ReplaceNode(Node, New.getNode());
3435        return;
3436      } else if (VT == MVT::i64) {
3437        SDValue New = CurDAG->getCopyFromReg(
3438            CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
3439        ReplaceNode(Node, New.getNode());
3440        return;
3441      }
3442    }
3443    break;
3444  }
3445
3446  case ISD::FrameIndex: {
3447    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
3448    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
3449    unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
3450    const TargetLowering *TLI = getTargetLowering();
3451    SDValue TFI = CurDAG->getTargetFrameIndex(
3452        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3453    SDLoc DL(Node);
3454    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
3455                      CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
3456    CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
3457    return;
3458  }
3459  case ISD::INTRINSIC_W_CHAIN: {
3460    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3461    switch (IntNo) {
3462    default:
3463      break;
3464    case Intrinsic::aarch64_ldaxp:
3465    case Intrinsic::aarch64_ldxp: {
3466      unsigned Op =
3467          IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
3468      SDValue MemAddr = Node->getOperand(2);
3469      SDLoc DL(Node);
3470      SDValue Chain = Node->getOperand(0);
3471
3472      SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
3473                                          MVT::Other, MemAddr, Chain);
3474
3475      // Transfer memoperands.
3476      MachineMemOperand *MemOp =
3477          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3478      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3479      ReplaceNode(Node, Ld);
3480      return;
3481    }
3482    case Intrinsic::aarch64_stlxp:
3483    case Intrinsic::aarch64_stxp: {
3484      unsigned Op =
3485          IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
3486      SDLoc DL(Node);
3487      SDValue Chain = Node->getOperand(0);
3488      SDValue ValLo = Node->getOperand(2);
3489      SDValue ValHi = Node->getOperand(3);
3490      SDValue MemAddr = Node->getOperand(4);
3491
3492      // Place arguments in the right order.
3493      SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
3494
3495      SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
3496      // Transfer memoperands.
3497      MachineMemOperand *MemOp =
3498          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3499      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3500
3501      ReplaceNode(Node, St);
3502      return;
3503    }
3504    case Intrinsic::aarch64_neon_ld1x2:
3505      if (VT == MVT::v8i8) {
3506        SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
3507        return;
3508      } else if (VT == MVT::v16i8) {
3509        SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3510        return;
3511      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3512        SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3513        return;
3514      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3515        SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3516        return;
3517      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3518        SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3519        return;
3520      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3521        SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3522        return;
3523      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3524        SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3525        return;
3526      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3527        SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3528        return;
3529      }
3530      break;
3531    case Intrinsic::aarch64_neon_ld1x3:
3532      if (VT == MVT::v8i8) {
3533        SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3534        return;
3535      } else if (VT == MVT::v16i8) {
3536        SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3537        return;
3538      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3539        SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3540        return;
3541      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3542        SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3543        return;
3544      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3545        SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3546        return;
3547      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3548        SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3549        return;
3550      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3551        SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3552        return;
3553      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3554        SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3555        return;
3556      }
3557      break;
3558    case Intrinsic::aarch64_neon_ld1x4:
3559      if (VT == MVT::v8i8) {
3560        SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3561        return;
3562      } else if (VT == MVT::v16i8) {
3563        SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3564        return;
3565      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3566        SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3567        return;
3568      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3569        SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3570        return;
3571      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3572        SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3573        return;
3574      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3575        SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3576        return;
3577      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3578        SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3579        return;
3580      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3581        SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3582        return;
3583      }
3584      break;
3585    case Intrinsic::aarch64_neon_ld2:
3586      if (VT == MVT::v8i8) {
3587        SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3588        return;
3589      } else if (VT == MVT::v16i8) {
3590        SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3591        return;
3592      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3593        SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3594        return;
3595      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3596        SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3597        return;
3598      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3599        SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3600        return;
3601      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3602        SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3603        return;
3604      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3605        SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3606        return;
3607      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3608        SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3609        return;
3610      }
3611      break;
3612    case Intrinsic::aarch64_neon_ld3:
3613      if (VT == MVT::v8i8) {
3614        SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3615        return;
3616      } else if (VT == MVT::v16i8) {
3617        SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3618        return;
3619      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3620        SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3621        return;
3622      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3623        SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3624        return;
3625      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3626        SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3627        return;
3628      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3629        SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3630        return;
3631      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3632        SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3633        return;
3634      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3635        SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3636        return;
3637      }
3638      break;
3639    case Intrinsic::aarch64_neon_ld4:
3640      if (VT == MVT::v8i8) {
3641        SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3642        return;
3643      } else if (VT == MVT::v16i8) {
3644        SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3645        return;
3646      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3647        SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3648        return;
3649      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3650        SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3651        return;
3652      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3653        SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3654        return;
3655      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3656        SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3657        return;
3658      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3659        SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3660        return;
3661      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3662        SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3663        return;
3664      }
3665      break;
3666    case Intrinsic::aarch64_neon_ld2r:
3667      if (VT == MVT::v8i8) {
3668        SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3669        return;
3670      } else if (VT == MVT::v16i8) {
3671        SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3672        return;
3673      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3674        SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3675        return;
3676      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3677        SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3678        return;
3679      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3680        SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3681        return;
3682      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3683        SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3684        return;
3685      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3686        SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3687        return;
3688      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3689        SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3690        return;
3691      }
3692      break;
3693    case Intrinsic::aarch64_neon_ld3r:
3694      if (VT == MVT::v8i8) {
3695        SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3696        return;
3697      } else if (VT == MVT::v16i8) {
3698        SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3699        return;
3700      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3701        SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3702        return;
3703      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3704        SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3705        return;
3706      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3707        SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3708        return;
3709      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3710        SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3711        return;
3712      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3713        SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3714        return;
3715      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3716        SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3717        return;
3718      }
3719      break;
3720    case Intrinsic::aarch64_neon_ld4r:
3721      if (VT == MVT::v8i8) {
3722        SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3723        return;
3724      } else if (VT == MVT::v16i8) {
3725        SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3726        return;
3727      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3728        SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3729        return;
3730      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3731        SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3732        return;
3733      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3734        SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3735        return;
3736      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3737        SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3738        return;
3739      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3740        SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3741        return;
3742      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3743        SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3744        return;
3745      }
3746      break;
3747    case Intrinsic::aarch64_neon_ld2lane:
3748      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3749        SelectLoadLane(Node, 2, AArch64::LD2i8);
3750        return;
3751      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3752                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3753        SelectLoadLane(Node, 2, AArch64::LD2i16);
3754        return;
3755      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3756                 VT == MVT::v2f32) {
3757        SelectLoadLane(Node, 2, AArch64::LD2i32);
3758        return;
3759      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3760                 VT == MVT::v1f64) {
3761        SelectLoadLane(Node, 2, AArch64::LD2i64);
3762        return;
3763      }
3764      break;
3765    case Intrinsic::aarch64_neon_ld3lane:
3766      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3767        SelectLoadLane(Node, 3, AArch64::LD3i8);
3768        return;
3769      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3770                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3771        SelectLoadLane(Node, 3, AArch64::LD3i16);
3772        return;
3773      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3774                 VT == MVT::v2f32) {
3775        SelectLoadLane(Node, 3, AArch64::LD3i32);
3776        return;
3777      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3778                 VT == MVT::v1f64) {
3779        SelectLoadLane(Node, 3, AArch64::LD3i64);
3780        return;
3781      }
3782      break;
3783    case Intrinsic::aarch64_neon_ld4lane:
3784      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3785        SelectLoadLane(Node, 4, AArch64::LD4i8);
3786        return;
3787      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3788                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3789        SelectLoadLane(Node, 4, AArch64::LD4i16);
3790        return;
3791      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3792                 VT == MVT::v2f32) {
3793        SelectLoadLane(Node, 4, AArch64::LD4i32);
3794        return;
3795      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3796                 VT == MVT::v1f64) {
3797        SelectLoadLane(Node, 4, AArch64::LD4i64);
3798        return;
3799      }
3800      break;
3801    }
3802  } break;
3803  case ISD::INTRINSIC_WO_CHAIN: {
3804    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3805    switch (IntNo) {
3806    default:
3807      break;
3808    case Intrinsic::aarch64_tagp:
3809      SelectTagP(Node);
3810      return;
3811    case Intrinsic::aarch64_neon_tbl2:
3812      SelectTable(Node, 2,
3813                  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3814                  false);
3815      return;
3816    case Intrinsic::aarch64_neon_tbl3:
3817      SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3818                                           : AArch64::TBLv16i8Three,
3819                  false);
3820      return;
3821    case Intrinsic::aarch64_neon_tbl4:
3822      SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3823                                           : AArch64::TBLv16i8Four,
3824                  false);
3825      return;
3826    case Intrinsic::aarch64_neon_tbx2:
3827      SelectTable(Node, 2,
3828                  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3829                  true);
3830      return;
3831    case Intrinsic::aarch64_neon_tbx3:
3832      SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3833                                           : AArch64::TBXv16i8Three,
3834                  true);
3835      return;
3836    case Intrinsic::aarch64_neon_tbx4:
3837      SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3838                                           : AArch64::TBXv16i8Four,
3839                  true);
3840      return;
3841    case Intrinsic::aarch64_neon_smull:
3842    case Intrinsic::aarch64_neon_umull:
3843      if (tryMULLV64LaneV128(IntNo, Node))
3844        return;
3845      break;
3846    }
3847    break;
3848  }
3849  case ISD::INTRINSIC_VOID: {
3850    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3851    if (Node->getNumOperands() >= 3)
3852      VT = Node->getOperand(2)->getValueType(0);
3853    switch (IntNo) {
3854    default:
3855      break;
3856    case Intrinsic::aarch64_neon_st1x2: {
3857      if (VT == MVT::v8i8) {
3858        SelectStore(Node, 2, AArch64::ST1Twov8b);
3859        return;
3860      } else if (VT == MVT::v16i8) {
3861        SelectStore(Node, 2, AArch64::ST1Twov16b);
3862        return;
3863      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3864                 VT == MVT::v4bf16) {
3865        SelectStore(Node, 2, AArch64::ST1Twov4h);
3866        return;
3867      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
3868                 VT == MVT::v8bf16) {
3869        SelectStore(Node, 2, AArch64::ST1Twov8h);
3870        return;
3871      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3872        SelectStore(Node, 2, AArch64::ST1Twov2s);
3873        return;
3874      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3875        SelectStore(Node, 2, AArch64::ST1Twov4s);
3876        return;
3877      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3878        SelectStore(Node, 2, AArch64::ST1Twov2d);
3879        return;
3880      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3881        SelectStore(Node, 2, AArch64::ST1Twov1d);
3882        return;
3883      }
3884      break;
3885    }
3886    case Intrinsic::aarch64_neon_st1x3: {
3887      if (VT == MVT::v8i8) {
3888        SelectStore(Node, 3, AArch64::ST1Threev8b);
3889        return;
3890      } else if (VT == MVT::v16i8) {
3891        SelectStore(Node, 3, AArch64::ST1Threev16b);
3892        return;
3893      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3894                 VT == MVT::v4bf16) {
3895        SelectStore(Node, 3, AArch64::ST1Threev4h);
3896        return;
3897      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
3898                 VT == MVT::v8bf16) {
3899        SelectStore(Node, 3, AArch64::ST1Threev8h);
3900        return;
3901      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3902        SelectStore(Node, 3, AArch64::ST1Threev2s);
3903        return;
3904      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3905        SelectStore(Node, 3, AArch64::ST1Threev4s);
3906        return;
3907      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3908        SelectStore(Node, 3, AArch64::ST1Threev2d);
3909        return;
3910      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3911        SelectStore(Node, 3, AArch64::ST1Threev1d);
3912        return;
3913      }
3914      break;
3915    }
3916    case Intrinsic::aarch64_neon_st1x4: {
3917      if (VT == MVT::v8i8) {
3918        SelectStore(Node, 4, AArch64::ST1Fourv8b);
3919        return;
3920      } else if (VT == MVT::v16i8) {
3921        SelectStore(Node, 4, AArch64::ST1Fourv16b);
3922        return;
3923      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3924                 VT == MVT::v4bf16) {
3925        SelectStore(Node, 4, AArch64::ST1Fourv4h);
3926        return;
3927      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
3928                 VT == MVT::v8bf16) {
3929        SelectStore(Node, 4, AArch64::ST1Fourv8h);
3930        return;
3931      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3932        SelectStore(Node, 4, AArch64::ST1Fourv2s);
3933        return;
3934      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3935        SelectStore(Node, 4, AArch64::ST1Fourv4s);
3936        return;
3937      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3938        SelectStore(Node, 4, AArch64::ST1Fourv2d);
3939        return;
3940      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3941        SelectStore(Node, 4, AArch64::ST1Fourv1d);
3942        return;
3943      }
3944      break;
3945    }
3946    case Intrinsic::aarch64_neon_st2: {
3947      if (VT == MVT::v8i8) {
3948        SelectStore(Node, 2, AArch64::ST2Twov8b);
3949        return;
3950      } else if (VT == MVT::v16i8) {
3951        SelectStore(Node, 2, AArch64::ST2Twov16b);
3952        return;
3953      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3954                 VT == MVT::v4bf16) {
3955        SelectStore(Node, 2, AArch64::ST2Twov4h);
3956        return;
3957      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
3958                 VT == MVT::v8bf16) {
3959        SelectStore(Node, 2, AArch64::ST2Twov8h);
3960        return;
3961      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3962        SelectStore(Node, 2, AArch64::ST2Twov2s);
3963        return;
3964      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3965        SelectStore(Node, 2, AArch64::ST2Twov4s);
3966        return;
3967      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3968        SelectStore(Node, 2, AArch64::ST2Twov2d);
3969        return;
3970      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3971        SelectStore(Node, 2, AArch64::ST1Twov1d);
3972        return;
3973      }
3974      break;
3975    }
3976    case Intrinsic::aarch64_neon_st3: {
3977      if (VT == MVT::v8i8) {
3978        SelectStore(Node, 3, AArch64::ST3Threev8b);
3979        return;
3980      } else if (VT == MVT::v16i8) {
3981        SelectStore(Node, 3, AArch64::ST3Threev16b);
3982        return;
3983      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3984                 VT == MVT::v4bf16) {
3985        SelectStore(Node, 3, AArch64::ST3Threev4h);
3986        return;
3987      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
3988                 VT == MVT::v8bf16) {
3989        SelectStore(Node, 3, AArch64::ST3Threev8h);
3990        return;
3991      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3992        SelectStore(Node, 3, AArch64::ST3Threev2s);
3993        return;
3994      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3995        SelectStore(Node, 3, AArch64::ST3Threev4s);
3996        return;
3997      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3998        SelectStore(Node, 3, AArch64::ST3Threev2d);
3999        return;
4000      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4001        SelectStore(Node, 3, AArch64::ST1Threev1d);
4002        return;
4003      }
4004      break;
4005    }
4006    case Intrinsic::aarch64_neon_st4: {
4007      if (VT == MVT::v8i8) {
4008        SelectStore(Node, 4, AArch64::ST4Fourv8b);
4009        return;
4010      } else if (VT == MVT::v16i8) {
4011        SelectStore(Node, 4, AArch64::ST4Fourv16b);
4012        return;
4013      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4014                 VT == MVT::v4bf16) {
4015        SelectStore(Node, 4, AArch64::ST4Fourv4h);
4016        return;
4017      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4018                 VT == MVT::v8bf16) {
4019        SelectStore(Node, 4, AArch64::ST4Fourv8h);
4020        return;
4021      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4022        SelectStore(Node, 4, AArch64::ST4Fourv2s);
4023        return;
4024      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4025        SelectStore(Node, 4, AArch64::ST4Fourv4s);
4026        return;
4027      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4028        SelectStore(Node, 4, AArch64::ST4Fourv2d);
4029        return;
4030      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4031        SelectStore(Node, 4, AArch64::ST1Fourv1d);
4032        return;
4033      }
4034      break;
4035    }
4036    case Intrinsic::aarch64_neon_st2lane: {
4037      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4038        SelectStoreLane(Node, 2, AArch64::ST2i8);
4039        return;
4040      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4041                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4042        SelectStoreLane(Node, 2, AArch64::ST2i16);
4043        return;
4044      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4045                 VT == MVT::v2f32) {
4046        SelectStoreLane(Node, 2, AArch64::ST2i32);
4047        return;
4048      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4049                 VT == MVT::v1f64) {
4050        SelectStoreLane(Node, 2, AArch64::ST2i64);
4051        return;
4052      }
4053      break;
4054    }
4055    case Intrinsic::aarch64_neon_st3lane: {
4056      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4057        SelectStoreLane(Node, 3, AArch64::ST3i8);
4058        return;
4059      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4060                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4061        SelectStoreLane(Node, 3, AArch64::ST3i16);
4062        return;
4063      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4064                 VT == MVT::v2f32) {
4065        SelectStoreLane(Node, 3, AArch64::ST3i32);
4066        return;
4067      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4068                 VT == MVT::v1f64) {
4069        SelectStoreLane(Node, 3, AArch64::ST3i64);
4070        return;
4071      }
4072      break;
4073    }
4074    case Intrinsic::aarch64_neon_st4lane: {
4075      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4076        SelectStoreLane(Node, 4, AArch64::ST4i8);
4077        return;
4078      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4079                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4080        SelectStoreLane(Node, 4, AArch64::ST4i16);
4081        return;
4082      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4083                 VT == MVT::v2f32) {
4084        SelectStoreLane(Node, 4, AArch64::ST4i32);
4085        return;
4086      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4087                 VT == MVT::v1f64) {
4088        SelectStoreLane(Node, 4, AArch64::ST4i64);
4089        return;
4090      }
4091      break;
4092    }
4093    case Intrinsic::aarch64_sve_st2: {
4094      if (VT == MVT::nxv16i8) {
4095        SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
4096        return;
4097      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4098                 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4099        SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
4100        return;
4101      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4102        SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
4103        return;
4104      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4105        SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
4106        return;
4107      }
4108      break;
4109    }
4110    case Intrinsic::aarch64_sve_st3: {
4111      if (VT == MVT::nxv16i8) {
4112        SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
4113        return;
4114      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4115                 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4116        SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
4117        return;
4118      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4119        SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
4120        return;
4121      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4122        SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
4123        return;
4124      }
4125      break;
4126    }
4127    case Intrinsic::aarch64_sve_st4: {
4128      if (VT == MVT::nxv16i8) {
4129        SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
4130        return;
4131      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4132                 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4133        SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
4134        return;
4135      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4136        SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
4137        return;
4138      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4139        SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
4140        return;
4141      }
4142      break;
4143    }
4144    }
4145    break;
4146  }
4147  case AArch64ISD::LD2post: {
4148    if (VT == MVT::v8i8) {
4149      SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
4150      return;
4151    } else if (VT == MVT::v16i8) {
4152      SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
4153      return;
4154    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4155      SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
4156      return;
4157    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4158      SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
4159      return;
4160    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4161      SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
4162      return;
4163    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4164      SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
4165      return;
4166    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4167      SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4168      return;
4169    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4170      SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
4171      return;
4172    }
4173    break;
4174  }
4175  case AArch64ISD::LD3post: {
4176    if (VT == MVT::v8i8) {
4177      SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
4178      return;
4179    } else if (VT == MVT::v16i8) {
4180      SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
4181      return;
4182    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4183      SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
4184      return;
4185    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4186      SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
4187      return;
4188    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4189      SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
4190      return;
4191    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4192      SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
4193      return;
4194    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4195      SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4196      return;
4197    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4198      SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
4199      return;
4200    }
4201    break;
4202  }
4203  case AArch64ISD::LD4post: {
4204    if (VT == MVT::v8i8) {
4205      SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
4206      return;
4207    } else if (VT == MVT::v16i8) {
4208      SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
4209      return;
4210    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4211      SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
4212      return;
4213    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4214      SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
4215      return;
4216    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4217      SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
4218      return;
4219    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4220      SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
4221      return;
4222    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4223      SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4224      return;
4225    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4226      SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
4227      return;
4228    }
4229    break;
4230  }
4231  case AArch64ISD::LD1x2post: {
4232    if (VT == MVT::v8i8) {
4233      SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
4234      return;
4235    } else if (VT == MVT::v16i8) {
4236      SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
4237      return;
4238    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4239      SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
4240      return;
4241    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4242      SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
4243      return;
4244    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4245      SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
4246      return;
4247    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4248      SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
4249      return;
4250    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4251      SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4252      return;
4253    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4254      SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
4255      return;
4256    }
4257    break;
4258  }
4259  case AArch64ISD::LD1x3post: {
4260    if (VT == MVT::v8i8) {
4261      SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
4262      return;
4263    } else if (VT == MVT::v16i8) {
4264      SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
4265      return;
4266    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4267      SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
4268      return;
4269    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4270      SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
4271      return;
4272    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4273      SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
4274      return;
4275    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4276      SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
4277      return;
4278    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4279      SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4280      return;
4281    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4282      SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
4283      return;
4284    }
4285    break;
4286  }
4287  case AArch64ISD::LD1x4post: {
4288    if (VT == MVT::v8i8) {
4289      SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
4290      return;
4291    } else if (VT == MVT::v16i8) {
4292      SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
4293      return;
4294    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4295      SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
4296      return;
4297    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4298      SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
4299      return;
4300    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4301      SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
4302      return;
4303    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4304      SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
4305      return;
4306    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4307      SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4308      return;
4309    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4310      SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
4311      return;
4312    }
4313    break;
4314  }
4315  case AArch64ISD::LD1DUPpost: {
4316    if (VT == MVT::v8i8) {
4317      SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
4318      return;
4319    } else if (VT == MVT::v16i8) {
4320      SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
4321      return;
4322    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4323      SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
4324      return;
4325    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4326      SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
4327      return;
4328    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4329      SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
4330      return;
4331    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4332      SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
4333      return;
4334    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4335      SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
4336      return;
4337    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4338      SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
4339      return;
4340    }
4341    break;
4342  }
4343  case AArch64ISD::LD2DUPpost: {
4344    if (VT == MVT::v8i8) {
4345      SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
4346      return;
4347    } else if (VT == MVT::v16i8) {
4348      SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
4349      return;
4350    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4351      SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
4352      return;
4353    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4354      SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
4355      return;
4356    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4357      SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
4358      return;
4359    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4360      SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
4361      return;
4362    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4363      SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
4364      return;
4365    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4366      SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
4367      return;
4368    }
4369    break;
4370  }
4371  case AArch64ISD::LD3DUPpost: {
4372    if (VT == MVT::v8i8) {
4373      SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
4374      return;
4375    } else if (VT == MVT::v16i8) {
4376      SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
4377      return;
4378    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4379      SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
4380      return;
4381    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4382      SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
4383      return;
4384    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4385      SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
4386      return;
4387    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4388      SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
4389      return;
4390    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4391      SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
4392      return;
4393    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4394      SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
4395      return;
4396    }
4397    break;
4398  }
4399  case AArch64ISD::LD4DUPpost: {
4400    if (VT == MVT::v8i8) {
4401      SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
4402      return;
4403    } else if (VT == MVT::v16i8) {
4404      SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
4405      return;
4406    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4407      SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
4408      return;
4409    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
4410      SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
4411      return;
4412    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4413      SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
4414      return;
4415    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4416      SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
4417      return;
4418    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4419      SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
4420      return;
4421    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4422      SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
4423      return;
4424    }
4425    break;
4426  }
4427  case AArch64ISD::LD1LANEpost: {
4428    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4429      SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
4430      return;
4431    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4432               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4433      SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
4434      return;
4435    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4436               VT == MVT::v2f32) {
4437      SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
4438      return;
4439    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4440               VT == MVT::v1f64) {
4441      SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
4442      return;
4443    }
4444    break;
4445  }
4446  case AArch64ISD::LD2LANEpost: {
4447    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4448      SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
4449      return;
4450    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4451               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4452      SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
4453      return;
4454    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4455               VT == MVT::v2f32) {
4456      SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
4457      return;
4458    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4459               VT == MVT::v1f64) {
4460      SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
4461      return;
4462    }
4463    break;
4464  }
4465  case AArch64ISD::LD3LANEpost: {
4466    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4467      SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
4468      return;
4469    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4470               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4471      SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
4472      return;
4473    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4474               VT == MVT::v2f32) {
4475      SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
4476      return;
4477    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4478               VT == MVT::v1f64) {
4479      SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
4480      return;
4481    }
4482    break;
4483  }
4484  case AArch64ISD::LD4LANEpost: {
4485    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4486      SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
4487      return;
4488    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4489               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4490      SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
4491      return;
4492    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4493               VT == MVT::v2f32) {
4494      SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
4495      return;
4496    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4497               VT == MVT::v1f64) {
4498      SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
4499      return;
4500    }
4501    break;
4502  }
4503  case AArch64ISD::ST2post: {
4504    VT = Node->getOperand(1).getValueType();
4505    if (VT == MVT::v8i8) {
4506      SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
4507      return;
4508    } else if (VT == MVT::v16i8) {
4509      SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
4510      return;
4511    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4512      SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
4513      return;
4514    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4515      SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
4516      return;
4517    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4518      SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
4519      return;
4520    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4521      SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
4522      return;
4523    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4524      SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
4525      return;
4526    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4527      SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4528      return;
4529    }
4530    break;
4531  }
4532  case AArch64ISD::ST3post: {
4533    VT = Node->getOperand(1).getValueType();
4534    if (VT == MVT::v8i8) {
4535      SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
4536      return;
4537    } else if (VT == MVT::v16i8) {
4538      SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
4539      return;
4540    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4541      SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
4542      return;
4543    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4544      SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
4545      return;
4546    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4547      SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
4548      return;
4549    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4550      SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
4551      return;
4552    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4553      SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
4554      return;
4555    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4556      SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4557      return;
4558    }
4559    break;
4560  }
4561  case AArch64ISD::ST4post: {
4562    VT = Node->getOperand(1).getValueType();
4563    if (VT == MVT::v8i8) {
4564      SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
4565      return;
4566    } else if (VT == MVT::v16i8) {
4567      SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
4568      return;
4569    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4570      SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
4571      return;
4572    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4573      SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4574      return;
4575    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4576      SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4577      return;
4578    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4579      SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4580      return;
4581    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4582      SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4583      return;
4584    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4585      SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4586      return;
4587    }
4588    break;
4589  }
4590  case AArch64ISD::ST1x2post: {
4591    VT = Node->getOperand(1).getValueType();
4592    if (VT == MVT::v8i8) {
4593      SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4594      return;
4595    } else if (VT == MVT::v16i8) {
4596      SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4597      return;
4598    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4599      SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4600      return;
4601    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4602      SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4603      return;
4604    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4605      SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4606      return;
4607    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4608      SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4609      return;
4610    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4611      SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4612      return;
4613    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4614      SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4615      return;
4616    }
4617    break;
4618  }
4619  case AArch64ISD::ST1x3post: {
4620    VT = Node->getOperand(1).getValueType();
4621    if (VT == MVT::v8i8) {
4622      SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4623      return;
4624    } else if (VT == MVT::v16i8) {
4625      SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4626      return;
4627    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4628      SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4629      return;
4630    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
4631      SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4632      return;
4633    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4634      SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4635      return;
4636    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4637      SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4638      return;
4639    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4640      SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4641      return;
4642    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4643      SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4644      return;
4645    }
4646    break;
4647  }
4648  case AArch64ISD::ST1x4post: {
4649    VT = Node->getOperand(1).getValueType();
4650    if (VT == MVT::v8i8) {
4651      SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4652      return;
4653    } else if (VT == MVT::v16i8) {
4654      SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4655      return;
4656    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4657      SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4658      return;
4659    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4660      SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4661      return;
4662    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4663      SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4664      return;
4665    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4666      SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4667      return;
4668    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4669      SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4670      return;
4671    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4672      SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4673      return;
4674    }
4675    break;
4676  }
4677  case AArch64ISD::ST2LANEpost: {
4678    VT = Node->getOperand(1).getValueType();
4679    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4680      SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4681      return;
4682    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4683               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4684      SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4685      return;
4686    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4687               VT == MVT::v2f32) {
4688      SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4689      return;
4690    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4691               VT == MVT::v1f64) {
4692      SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4693      return;
4694    }
4695    break;
4696  }
4697  case AArch64ISD::ST3LANEpost: {
4698    VT = Node->getOperand(1).getValueType();
4699    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4700      SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4701      return;
4702    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4703               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4704      SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4705      return;
4706    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4707               VT == MVT::v2f32) {
4708      SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4709      return;
4710    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4711               VT == MVT::v1f64) {
4712      SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4713      return;
4714    }
4715    break;
4716  }
4717  case AArch64ISD::ST4LANEpost: {
4718    VT = Node->getOperand(1).getValueType();
4719    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4720      SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4721      return;
4722    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4723               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4724      SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4725      return;
4726    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4727               VT == MVT::v2f32) {
4728      SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4729      return;
4730    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4731               VT == MVT::v1f64) {
4732      SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4733      return;
4734    }
4735    break;
4736  }
4737  case AArch64ISD::SVE_LD2_MERGE_ZERO: {
4738    if (VT == MVT::nxv16i8) {
4739      SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
4740      return;
4741    } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4742               (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4743      SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
4744      return;
4745    } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4746      SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
4747      return;
4748    } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4749      SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
4750      return;
4751    }
4752    break;
4753  }
4754  case AArch64ISD::SVE_LD3_MERGE_ZERO: {
4755    if (VT == MVT::nxv16i8) {
4756      SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
4757      return;
4758    } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4759               (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4760      SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
4761      return;
4762    } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4763      SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
4764      return;
4765    } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4766      SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
4767      return;
4768    }
4769    break;
4770  }
4771  case AArch64ISD::SVE_LD4_MERGE_ZERO: {
4772    if (VT == MVT::nxv16i8) {
4773      SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
4774      return;
4775    } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4776               (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4777      SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
4778      return;
4779    } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4780      SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
4781      return;
4782    } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4783      SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
4784      return;
4785    }
4786    break;
4787  }
4788  }
4789
4790  // Select the default instruction
4791  SelectCode(Node);
4792}
4793
4794/// createAArch64ISelDag - This pass converts a legalized DAG into a
4795/// AArch64-specific DAG, ready for instruction scheduling.
4796FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
4797                                         CodeGenOpt::Level OptLevel) {
4798  return new AArch64DAGToDAGISel(TM, OptLevel);
4799}
4800
4801/// When \p PredVT is a scalable vector predicate in the form
4802/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
4803/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
4804/// structured vectors (NumVec >1), the output data type is
4805/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
4806/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
4807/// EVT.
4808static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
4809                                                unsigned NumVec) {
4810  assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
4811  if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
4812    return EVT();
4813
4814  if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
4815      PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
4816    return EVT();
4817
4818  ElementCount EC = PredVT.getVectorElementCount();
4819  EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min);
4820  EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
4821
4822  return MemVT;
4823}
4824
4825/// Return the EVT of the data associated to a memory operation in \p
4826/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
4827static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
4828  if (isa<MemSDNode>(Root))
4829    return cast<MemSDNode>(Root)->getMemoryVT();
4830
4831  if (isa<MemIntrinsicSDNode>(Root))
4832    return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
4833
4834  const unsigned Opcode = Root->getOpcode();
4835  // For custom ISD nodes, we have to look at them individually to extract the
4836  // type of the data moved to/from memory.
4837  switch (Opcode) {
4838  case AArch64ISD::LD1_MERGE_ZERO:
4839  case AArch64ISD::LD1S_MERGE_ZERO:
4840  case AArch64ISD::LDNF1_MERGE_ZERO:
4841  case AArch64ISD::LDNF1S_MERGE_ZERO:
4842    return cast<VTSDNode>(Root->getOperand(3))->getVT();
4843  case AArch64ISD::ST1_PRED:
4844    return cast<VTSDNode>(Root->getOperand(4))->getVT();
4845  case AArch64ISD::SVE_LD2_MERGE_ZERO:
4846    return getPackedVectorTypeFromPredicateType(
4847        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
4848  case AArch64ISD::SVE_LD3_MERGE_ZERO:
4849    return getPackedVectorTypeFromPredicateType(
4850        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
4851  case AArch64ISD::SVE_LD4_MERGE_ZERO:
4852    return getPackedVectorTypeFromPredicateType(
4853        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
4854  default:
4855    break;
4856  }
4857
4858  if (Opcode != ISD::INTRINSIC_VOID)
4859    return EVT();
4860
4861  const unsigned IntNo =
4862      cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
4863  if (IntNo != Intrinsic::aarch64_sve_prf)
4864    return EVT();
4865
4866  // We are using an SVE prefetch intrinsic. Type must be inferred
4867  // from the width of the predicate.
4868  return getPackedVectorTypeFromPredicateType(
4869      Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
4870}
4871
4872/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
4873/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
4874/// where Root is the memory access using N for its address.
4875template <int64_t Min, int64_t Max>
4876bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
4877                                                   SDValue &Base,
4878                                                   SDValue &OffImm) {
4879  const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
4880
4881  if (MemVT == EVT())
4882    return false;
4883
4884  if (N.getOpcode() != ISD::ADD)
4885    return false;
4886
4887  SDValue VScale = N.getOperand(1);
4888  if (VScale.getOpcode() != ISD::VSCALE)
4889    return false;
4890
4891  TypeSize TS = MemVT.getSizeInBits();
4892  int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8;
4893  int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
4894
4895  if ((MulImm % MemWidthBytes) != 0)
4896    return false;
4897
4898  int64_t Offset = MulImm / MemWidthBytes;
4899  if (Offset < Min || Offset > Max)
4900    return false;
4901
4902  Base = N.getOperand(0);
4903  OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
4904  return true;
4905}
4906
4907/// Select register plus register addressing mode for SVE, with scaled
4908/// offset.
4909bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
4910                                                  SDValue &Base,
4911                                                  SDValue &Offset) {
4912  if (N.getOpcode() != ISD::ADD)
4913    return false;
4914
4915  // Process an ADD node.
4916  const SDValue LHS = N.getOperand(0);
4917  const SDValue RHS = N.getOperand(1);
4918
4919  // 8 bit data does not come with the SHL node, so it is treated
4920  // separately.
4921  if (Scale == 0) {
4922    Base = LHS;
4923    Offset = RHS;
4924    return true;
4925  }
4926
4927  // Check if the RHS is a shift node with a constant.
4928  if (RHS.getOpcode() != ISD::SHL)
4929    return false;
4930
4931  const SDValue ShiftRHS = RHS.getOperand(1);
4932  if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
4933    if (C->getZExtValue() == Scale) {
4934      Base = LHS;
4935      Offset = RHS.getOperand(0);
4936      return true;
4937    }
4938
4939  return false;
4940}
4941