1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64MachineFunctionInfo.h"
15#include "AArch64Subtarget.h"
16#include "MCTargetDesc/AArch64AddressingModes.h"
17#include "Utils/AArch64BaseInfo.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/CodeGen/MachineBasicBlock.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineMemOperand.h"
27#include "llvm/CodeGen/MachineModuleInfo.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/StackMaps.h"
31#include "llvm/CodeGen/TargetRegisterInfo.h"
32#include "llvm/CodeGen/TargetSubtargetInfo.h"
33#include "llvm/IR/DebugInfoMetadata.h"
34#include "llvm/IR/DebugLoc.h"
35#include "llvm/IR/GlobalValue.h"
36#include "llvm/MC/MCAsmInfo.h"
37#include "llvm/MC/MCInst.h"
38#include "llvm/MC/MCInstrDesc.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/CodeGen.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Compiler.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/MathExtras.h"
45#include "llvm/Target/TargetMachine.h"
46#include "llvm/Target/TargetOptions.h"
47#include <cassert>
48#include <cstdint>
49#include <iterator>
50#include <utility>
51
52using namespace llvm;
53
54#define GET_INSTRINFO_CTOR_DTOR
55#include "AArch64GenInstrInfo.inc"
56
57static cl::opt<unsigned> TBZDisplacementBits(
58    "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60
61static cl::opt<unsigned> CBZDisplacementBits(
62    "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64
65static cl::opt<unsigned>
66    BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67                        cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68
69AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
70    : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
71                          AArch64::CATCHRET),
72      RI(STI.getTargetTriple()), Subtarget(STI) {}
73
74/// GetInstSize - Return the number of bytes of code the specified
75/// instruction may be.  This returns the maximum number of bytes.
76unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
77  const MachineBasicBlock &MBB = *MI.getParent();
78  const MachineFunction *MF = MBB.getParent();
79  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
80
81  {
82    auto Op = MI.getOpcode();
83    if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
84      return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
85  }
86
87  // Meta-instructions emit no code.
88  if (MI.isMetaInstruction())
89    return 0;
90
91  // FIXME: We currently only handle pseudoinstructions that don't get expanded
92  //        before the assembly printer.
93  unsigned NumBytes = 0;
94  const MCInstrDesc &Desc = MI.getDesc();
95  switch (Desc.getOpcode()) {
96  default:
97    // Anything not explicitly designated otherwise is a normal 4-byte insn.
98    NumBytes = 4;
99    break;
100  case TargetOpcode::STACKMAP:
101    // The upper bound for a stackmap intrinsic is the full length of its shadow
102    NumBytes = StackMapOpers(&MI).getNumPatchBytes();
103    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
104    break;
105  case TargetOpcode::PATCHPOINT:
106    // The size of the patchpoint intrinsic is the number of bytes requested
107    NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
108    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
109    break;
110  case AArch64::TLSDESC_CALLSEQ:
111    // This gets lowered to an instruction sequence which takes 16 bytes
112    NumBytes = 16;
113    break;
114  case AArch64::SpeculationBarrierISBDSBEndBB:
115    // This gets lowered to 2 4-byte instructions.
116    NumBytes = 8;
117    break;
118  case AArch64::SpeculationBarrierSBEndBB:
119    // This gets lowered to 1 4-byte instructions.
120    NumBytes = 4;
121    break;
122  case AArch64::JumpTableDest32:
123  case AArch64::JumpTableDest16:
124  case AArch64::JumpTableDest8:
125    NumBytes = 12;
126    break;
127  case AArch64::SPACE:
128    NumBytes = MI.getOperand(1).getImm();
129    break;
130  case TargetOpcode::BUNDLE:
131    NumBytes = getInstBundleLength(MI);
132    break;
133  }
134
135  return NumBytes;
136}
137
138unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
139  unsigned Size = 0;
140  MachineBasicBlock::const_instr_iterator I = MI.getIterator();
141  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
142  while (++I != E && I->isInsideBundle()) {
143    assert(!I->isBundle() && "No nested bundle!");
144    Size += getInstSizeInBytes(*I);
145  }
146  return Size;
147}
148
149static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
150                            SmallVectorImpl<MachineOperand> &Cond) {
151  // Block ends with fall-through condbranch.
152  switch (LastInst->getOpcode()) {
153  default:
154    llvm_unreachable("Unknown branch instruction?");
155  case AArch64::Bcc:
156    Target = LastInst->getOperand(1).getMBB();
157    Cond.push_back(LastInst->getOperand(0));
158    break;
159  case AArch64::CBZW:
160  case AArch64::CBZX:
161  case AArch64::CBNZW:
162  case AArch64::CBNZX:
163    Target = LastInst->getOperand(1).getMBB();
164    Cond.push_back(MachineOperand::CreateImm(-1));
165    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
166    Cond.push_back(LastInst->getOperand(0));
167    break;
168  case AArch64::TBZW:
169  case AArch64::TBZX:
170  case AArch64::TBNZW:
171  case AArch64::TBNZX:
172    Target = LastInst->getOperand(2).getMBB();
173    Cond.push_back(MachineOperand::CreateImm(-1));
174    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
175    Cond.push_back(LastInst->getOperand(0));
176    Cond.push_back(LastInst->getOperand(1));
177  }
178}
179
180static unsigned getBranchDisplacementBits(unsigned Opc) {
181  switch (Opc) {
182  default:
183    llvm_unreachable("unexpected opcode!");
184  case AArch64::B:
185    return 64;
186  case AArch64::TBNZW:
187  case AArch64::TBZW:
188  case AArch64::TBNZX:
189  case AArch64::TBZX:
190    return TBZDisplacementBits;
191  case AArch64::CBNZW:
192  case AArch64::CBZW:
193  case AArch64::CBNZX:
194  case AArch64::CBZX:
195    return CBZDisplacementBits;
196  case AArch64::Bcc:
197    return BCCDisplacementBits;
198  }
199}
200
201bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
202                                             int64_t BrOffset) const {
203  unsigned Bits = getBranchDisplacementBits(BranchOp);
204  assert(Bits >= 3 && "max branch displacement must be enough to jump"
205                      "over conditional branch expansion");
206  return isIntN(Bits, BrOffset / 4);
207}
208
209MachineBasicBlock *
210AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
211  switch (MI.getOpcode()) {
212  default:
213    llvm_unreachable("unexpected opcode!");
214  case AArch64::B:
215    return MI.getOperand(0).getMBB();
216  case AArch64::TBZW:
217  case AArch64::TBNZW:
218  case AArch64::TBZX:
219  case AArch64::TBNZX:
220    return MI.getOperand(2).getMBB();
221  case AArch64::CBZW:
222  case AArch64::CBNZW:
223  case AArch64::CBZX:
224  case AArch64::CBNZX:
225  case AArch64::Bcc:
226    return MI.getOperand(1).getMBB();
227  }
228}
229
230// Branch analysis.
231bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
232                                     MachineBasicBlock *&TBB,
233                                     MachineBasicBlock *&FBB,
234                                     SmallVectorImpl<MachineOperand> &Cond,
235                                     bool AllowModify) const {
236  // If the block has no terminators, it just falls into the block after it.
237  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
238  if (I == MBB.end())
239    return false;
240
241  // Skip over SpeculationBarrierEndBB terminators
242  if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
243      I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
244    --I;
245  }
246
247  if (!isUnpredicatedTerminator(*I))
248    return false;
249
250  // Get the last instruction in the block.
251  MachineInstr *LastInst = &*I;
252
253  // If there is only one terminator instruction, process it.
254  unsigned LastOpc = LastInst->getOpcode();
255  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
256    if (isUncondBranchOpcode(LastOpc)) {
257      TBB = LastInst->getOperand(0).getMBB();
258      return false;
259    }
260    if (isCondBranchOpcode(LastOpc)) {
261      // Block ends with fall-through condbranch.
262      parseCondBranch(LastInst, TBB, Cond);
263      return false;
264    }
265    return true; // Can't handle indirect branch.
266  }
267
268  // Get the instruction before it if it is a terminator.
269  MachineInstr *SecondLastInst = &*I;
270  unsigned SecondLastOpc = SecondLastInst->getOpcode();
271
272  // If AllowModify is true and the block ends with two or more unconditional
273  // branches, delete all but the first unconditional branch.
274  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
275    while (isUncondBranchOpcode(SecondLastOpc)) {
276      LastInst->eraseFromParent();
277      LastInst = SecondLastInst;
278      LastOpc = LastInst->getOpcode();
279      if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
280        // Return now the only terminator is an unconditional branch.
281        TBB = LastInst->getOperand(0).getMBB();
282        return false;
283      } else {
284        SecondLastInst = &*I;
285        SecondLastOpc = SecondLastInst->getOpcode();
286      }
287    }
288  }
289
290  // If there are three terminators, we don't know what sort of block this is.
291  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
292    return true;
293
294  // If the block ends with a B and a Bcc, handle it.
295  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
296    parseCondBranch(SecondLastInst, TBB, Cond);
297    FBB = LastInst->getOperand(0).getMBB();
298    return false;
299  }
300
301  // If the block ends with two unconditional branches, handle it.  The second
302  // one is not executed, so remove it.
303  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
304    TBB = SecondLastInst->getOperand(0).getMBB();
305    I = LastInst;
306    if (AllowModify)
307      I->eraseFromParent();
308    return false;
309  }
310
311  // ...likewise if it ends with an indirect branch followed by an unconditional
312  // branch.
313  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
314    I = LastInst;
315    if (AllowModify)
316      I->eraseFromParent();
317    return true;
318  }
319
320  // Otherwise, can't handle this.
321  return true;
322}
323
324bool AArch64InstrInfo::reverseBranchCondition(
325    SmallVectorImpl<MachineOperand> &Cond) const {
326  if (Cond[0].getImm() != -1) {
327    // Regular Bcc
328    AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
329    Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
330  } else {
331    // Folded compare-and-branch
332    switch (Cond[1].getImm()) {
333    default:
334      llvm_unreachable("Unknown conditional branch!");
335    case AArch64::CBZW:
336      Cond[1].setImm(AArch64::CBNZW);
337      break;
338    case AArch64::CBNZW:
339      Cond[1].setImm(AArch64::CBZW);
340      break;
341    case AArch64::CBZX:
342      Cond[1].setImm(AArch64::CBNZX);
343      break;
344    case AArch64::CBNZX:
345      Cond[1].setImm(AArch64::CBZX);
346      break;
347    case AArch64::TBZW:
348      Cond[1].setImm(AArch64::TBNZW);
349      break;
350    case AArch64::TBNZW:
351      Cond[1].setImm(AArch64::TBZW);
352      break;
353    case AArch64::TBZX:
354      Cond[1].setImm(AArch64::TBNZX);
355      break;
356    case AArch64::TBNZX:
357      Cond[1].setImm(AArch64::TBZX);
358      break;
359    }
360  }
361
362  return false;
363}
364
365unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
366                                        int *BytesRemoved) const {
367  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
368  if (I == MBB.end())
369    return 0;
370
371  if (!isUncondBranchOpcode(I->getOpcode()) &&
372      !isCondBranchOpcode(I->getOpcode()))
373    return 0;
374
375  // Remove the branch.
376  I->eraseFromParent();
377
378  I = MBB.end();
379
380  if (I == MBB.begin()) {
381    if (BytesRemoved)
382      *BytesRemoved = 4;
383    return 1;
384  }
385  --I;
386  if (!isCondBranchOpcode(I->getOpcode())) {
387    if (BytesRemoved)
388      *BytesRemoved = 4;
389    return 1;
390  }
391
392  // Remove the branch.
393  I->eraseFromParent();
394  if (BytesRemoved)
395    *BytesRemoved = 8;
396
397  return 2;
398}
399
400void AArch64InstrInfo::instantiateCondBranch(
401    MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
402    ArrayRef<MachineOperand> Cond) const {
403  if (Cond[0].getImm() != -1) {
404    // Regular Bcc
405    BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
406  } else {
407    // Folded compare-and-branch
408    // Note that we use addOperand instead of addReg to keep the flags.
409    const MachineInstrBuilder MIB =
410        BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
411    if (Cond.size() > 3)
412      MIB.addImm(Cond[3].getImm());
413    MIB.addMBB(TBB);
414  }
415}
416
417unsigned AArch64InstrInfo::insertBranch(
418    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
419    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
420  // Shouldn't be a fall through.
421  assert(TBB && "insertBranch must not be told to insert a fallthrough");
422
423  if (!FBB) {
424    if (Cond.empty()) // Unconditional branch?
425      BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
426    else
427      instantiateCondBranch(MBB, DL, TBB, Cond);
428
429    if (BytesAdded)
430      *BytesAdded = 4;
431
432    return 1;
433  }
434
435  // Two-way conditional branch.
436  instantiateCondBranch(MBB, DL, TBB, Cond);
437  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
438
439  if (BytesAdded)
440    *BytesAdded = 8;
441
442  return 2;
443}
444
445// Find the original register that VReg is copied from.
446static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
447  while (Register::isVirtualRegister(VReg)) {
448    const MachineInstr *DefMI = MRI.getVRegDef(VReg);
449    if (!DefMI->isFullCopy())
450      return VReg;
451    VReg = DefMI->getOperand(1).getReg();
452  }
453  return VReg;
454}
455
456// Determine if VReg is defined by an instruction that can be folded into a
457// csel instruction. If so, return the folded opcode, and the replacement
458// register.
459static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
460                                unsigned *NewVReg = nullptr) {
461  VReg = removeCopies(MRI, VReg);
462  if (!Register::isVirtualRegister(VReg))
463    return 0;
464
465  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
466  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
467  unsigned Opc = 0;
468  unsigned SrcOpNum = 0;
469  switch (DefMI->getOpcode()) {
470  case AArch64::ADDSXri:
471  case AArch64::ADDSWri:
472    // if NZCV is used, do not fold.
473    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
474      return 0;
475    // fall-through to ADDXri and ADDWri.
476    LLVM_FALLTHROUGH;
477  case AArch64::ADDXri:
478  case AArch64::ADDWri:
479    // add x, 1 -> csinc.
480    if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
481        DefMI->getOperand(3).getImm() != 0)
482      return 0;
483    SrcOpNum = 1;
484    Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
485    break;
486
487  case AArch64::ORNXrr:
488  case AArch64::ORNWrr: {
489    // not x -> csinv, represented as orn dst, xzr, src.
490    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
491    if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
492      return 0;
493    SrcOpNum = 2;
494    Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
495    break;
496  }
497
498  case AArch64::SUBSXrr:
499  case AArch64::SUBSWrr:
500    // if NZCV is used, do not fold.
501    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
502      return 0;
503    // fall-through to SUBXrr and SUBWrr.
504    LLVM_FALLTHROUGH;
505  case AArch64::SUBXrr:
506  case AArch64::SUBWrr: {
507    // neg x -> csneg, represented as sub dst, xzr, src.
508    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
509    if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
510      return 0;
511    SrcOpNum = 2;
512    Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
513    break;
514  }
515  default:
516    return 0;
517  }
518  assert(Opc && SrcOpNum && "Missing parameters");
519
520  if (NewVReg)
521    *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
522  return Opc;
523}
524
525bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
526                                       ArrayRef<MachineOperand> Cond,
527                                       Register DstReg, Register TrueReg,
528                                       Register FalseReg, int &CondCycles,
529                                       int &TrueCycles,
530                                       int &FalseCycles) const {
531  // Check register classes.
532  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
533  const TargetRegisterClass *RC =
534      RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
535  if (!RC)
536    return false;
537
538  // Also need to check the dest regclass, in case we're trying to optimize
539  // something like:
540  // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
541  if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
542    return false;
543
544  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
545  unsigned ExtraCondLat = Cond.size() != 1;
546
547  // GPRs are handled by csel.
548  // FIXME: Fold in x+1, -x, and ~x when applicable.
549  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
550      AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
551    // Single-cycle csel, csinc, csinv, and csneg.
552    CondCycles = 1 + ExtraCondLat;
553    TrueCycles = FalseCycles = 1;
554    if (canFoldIntoCSel(MRI, TrueReg))
555      TrueCycles = 0;
556    else if (canFoldIntoCSel(MRI, FalseReg))
557      FalseCycles = 0;
558    return true;
559  }
560
561  // Scalar floating point is handled by fcsel.
562  // FIXME: Form fabs, fmin, and fmax when applicable.
563  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
564      AArch64::FPR32RegClass.hasSubClassEq(RC)) {
565    CondCycles = 5 + ExtraCondLat;
566    TrueCycles = FalseCycles = 2;
567    return true;
568  }
569
570  // Can't do vectors.
571  return false;
572}
573
574void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
575                                    MachineBasicBlock::iterator I,
576                                    const DebugLoc &DL, Register DstReg,
577                                    ArrayRef<MachineOperand> Cond,
578                                    Register TrueReg, Register FalseReg) const {
579  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
580
581  // Parse the condition code, see parseCondBranch() above.
582  AArch64CC::CondCode CC;
583  switch (Cond.size()) {
584  default:
585    llvm_unreachable("Unknown condition opcode in Cond");
586  case 1: // b.cc
587    CC = AArch64CC::CondCode(Cond[0].getImm());
588    break;
589  case 3: { // cbz/cbnz
590    // We must insert a compare against 0.
591    bool Is64Bit;
592    switch (Cond[1].getImm()) {
593    default:
594      llvm_unreachable("Unknown branch opcode in Cond");
595    case AArch64::CBZW:
596      Is64Bit = false;
597      CC = AArch64CC::EQ;
598      break;
599    case AArch64::CBZX:
600      Is64Bit = true;
601      CC = AArch64CC::EQ;
602      break;
603    case AArch64::CBNZW:
604      Is64Bit = false;
605      CC = AArch64CC::NE;
606      break;
607    case AArch64::CBNZX:
608      Is64Bit = true;
609      CC = AArch64CC::NE;
610      break;
611    }
612    Register SrcReg = Cond[2].getReg();
613    if (Is64Bit) {
614      // cmp reg, #0 is actually subs xzr, reg, #0.
615      MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
616      BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
617          .addReg(SrcReg)
618          .addImm(0)
619          .addImm(0);
620    } else {
621      MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
622      BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
623          .addReg(SrcReg)
624          .addImm(0)
625          .addImm(0);
626    }
627    break;
628  }
629  case 4: { // tbz/tbnz
630    // We must insert a tst instruction.
631    switch (Cond[1].getImm()) {
632    default:
633      llvm_unreachable("Unknown branch opcode in Cond");
634    case AArch64::TBZW:
635    case AArch64::TBZX:
636      CC = AArch64CC::EQ;
637      break;
638    case AArch64::TBNZW:
639    case AArch64::TBNZX:
640      CC = AArch64CC::NE;
641      break;
642    }
643    // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
644    if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
645      BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
646          .addReg(Cond[2].getReg())
647          .addImm(
648              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
649    else
650      BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
651          .addReg(Cond[2].getReg())
652          .addImm(
653              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
654    break;
655  }
656  }
657
658  unsigned Opc = 0;
659  const TargetRegisterClass *RC = nullptr;
660  bool TryFold = false;
661  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
662    RC = &AArch64::GPR64RegClass;
663    Opc = AArch64::CSELXr;
664    TryFold = true;
665  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
666    RC = &AArch64::GPR32RegClass;
667    Opc = AArch64::CSELWr;
668    TryFold = true;
669  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
670    RC = &AArch64::FPR64RegClass;
671    Opc = AArch64::FCSELDrrr;
672  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
673    RC = &AArch64::FPR32RegClass;
674    Opc = AArch64::FCSELSrrr;
675  }
676  assert(RC && "Unsupported regclass");
677
678  // Try folding simple instructions into the csel.
679  if (TryFold) {
680    unsigned NewVReg = 0;
681    unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
682    if (FoldedOpc) {
683      // The folded opcodes csinc, csinc and csneg apply the operation to
684      // FalseReg, so we need to invert the condition.
685      CC = AArch64CC::getInvertedCondCode(CC);
686      TrueReg = FalseReg;
687    } else
688      FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
689
690    // Fold the operation. Leave any dead instructions for DCE to clean up.
691    if (FoldedOpc) {
692      FalseReg = NewVReg;
693      Opc = FoldedOpc;
694      // The extends the live range of NewVReg.
695      MRI.clearKillFlags(NewVReg);
696    }
697  }
698
699  // Pull all virtual register into the appropriate class.
700  MRI.constrainRegClass(TrueReg, RC);
701  MRI.constrainRegClass(FalseReg, RC);
702
703  // Insert the csel.
704  BuildMI(MBB, I, DL, get(Opc), DstReg)
705      .addReg(TrueReg)
706      .addReg(FalseReg)
707      .addImm(CC);
708}
709
710/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
711static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
712  uint64_t Imm = MI.getOperand(1).getImm();
713  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
714  uint64_t Encoding;
715  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
716}
717
718// FIXME: this implementation should be micro-architecture dependent, so a
719// micro-architecture target hook should be introduced here in future.
720bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
721  if (!Subtarget.hasCustomCheapAsMoveHandling())
722    return MI.isAsCheapAsAMove();
723
724  const unsigned Opcode = MI.getOpcode();
725
726  // Firstly, check cases gated by features.
727
728  if (Subtarget.hasZeroCycleZeroingFP()) {
729    if (Opcode == AArch64::FMOVH0 ||
730        Opcode == AArch64::FMOVS0 ||
731        Opcode == AArch64::FMOVD0)
732      return true;
733  }
734
735  if (Subtarget.hasZeroCycleZeroingGP()) {
736    if (Opcode == TargetOpcode::COPY &&
737        (MI.getOperand(1).getReg() == AArch64::WZR ||
738         MI.getOperand(1).getReg() == AArch64::XZR))
739      return true;
740  }
741
742  // Secondly, check cases specific to sub-targets.
743
744  if (Subtarget.hasExynosCheapAsMoveHandling()) {
745    if (isExynosCheapAsMove(MI))
746      return true;
747
748    return MI.isAsCheapAsAMove();
749  }
750
751  // Finally, check generic cases.
752
753  switch (Opcode) {
754  default:
755    return false;
756
757  // add/sub on register without shift
758  case AArch64::ADDWri:
759  case AArch64::ADDXri:
760  case AArch64::SUBWri:
761  case AArch64::SUBXri:
762    return (MI.getOperand(3).getImm() == 0);
763
764  // logical ops on immediate
765  case AArch64::ANDWri:
766  case AArch64::ANDXri:
767  case AArch64::EORWri:
768  case AArch64::EORXri:
769  case AArch64::ORRWri:
770  case AArch64::ORRXri:
771    return true;
772
773  // logical ops on register without shift
774  case AArch64::ANDWrr:
775  case AArch64::ANDXrr:
776  case AArch64::BICWrr:
777  case AArch64::BICXrr:
778  case AArch64::EONWrr:
779  case AArch64::EONXrr:
780  case AArch64::EORWrr:
781  case AArch64::EORXrr:
782  case AArch64::ORNWrr:
783  case AArch64::ORNXrr:
784  case AArch64::ORRWrr:
785  case AArch64::ORRXrr:
786    return true;
787
788  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
789  // ORRXri, it is as cheap as MOV
790  case AArch64::MOVi32imm:
791    return canBeExpandedToORR(MI, 32);
792  case AArch64::MOVi64imm:
793    return canBeExpandedToORR(MI, 64);
794  }
795
796  llvm_unreachable("Unknown opcode to check as cheap as a move!");
797}
798
799bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
800  switch (MI.getOpcode()) {
801  default:
802    return false;
803
804  case AArch64::ADDWrs:
805  case AArch64::ADDXrs:
806  case AArch64::ADDSWrs:
807  case AArch64::ADDSXrs: {
808    unsigned Imm = MI.getOperand(3).getImm();
809    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
810    if (ShiftVal == 0)
811      return true;
812    return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
813  }
814
815  case AArch64::ADDWrx:
816  case AArch64::ADDXrx:
817  case AArch64::ADDXrx64:
818  case AArch64::ADDSWrx:
819  case AArch64::ADDSXrx:
820  case AArch64::ADDSXrx64: {
821    unsigned Imm = MI.getOperand(3).getImm();
822    switch (AArch64_AM::getArithExtendType(Imm)) {
823    default:
824      return false;
825    case AArch64_AM::UXTB:
826    case AArch64_AM::UXTH:
827    case AArch64_AM::UXTW:
828    case AArch64_AM::UXTX:
829      return AArch64_AM::getArithShiftValue(Imm) <= 4;
830    }
831  }
832
833  case AArch64::SUBWrs:
834  case AArch64::SUBSWrs: {
835    unsigned Imm = MI.getOperand(3).getImm();
836    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
837    return ShiftVal == 0 ||
838           (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
839  }
840
841  case AArch64::SUBXrs:
842  case AArch64::SUBSXrs: {
843    unsigned Imm = MI.getOperand(3).getImm();
844    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
845    return ShiftVal == 0 ||
846           (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
847  }
848
849  case AArch64::SUBWrx:
850  case AArch64::SUBXrx:
851  case AArch64::SUBXrx64:
852  case AArch64::SUBSWrx:
853  case AArch64::SUBSXrx:
854  case AArch64::SUBSXrx64: {
855    unsigned Imm = MI.getOperand(3).getImm();
856    switch (AArch64_AM::getArithExtendType(Imm)) {
857    default:
858      return false;
859    case AArch64_AM::UXTB:
860    case AArch64_AM::UXTH:
861    case AArch64_AM::UXTW:
862    case AArch64_AM::UXTX:
863      return AArch64_AM::getArithShiftValue(Imm) == 0;
864    }
865  }
866
867  case AArch64::LDRBBroW:
868  case AArch64::LDRBBroX:
869  case AArch64::LDRBroW:
870  case AArch64::LDRBroX:
871  case AArch64::LDRDroW:
872  case AArch64::LDRDroX:
873  case AArch64::LDRHHroW:
874  case AArch64::LDRHHroX:
875  case AArch64::LDRHroW:
876  case AArch64::LDRHroX:
877  case AArch64::LDRQroW:
878  case AArch64::LDRQroX:
879  case AArch64::LDRSBWroW:
880  case AArch64::LDRSBWroX:
881  case AArch64::LDRSBXroW:
882  case AArch64::LDRSBXroX:
883  case AArch64::LDRSHWroW:
884  case AArch64::LDRSHWroX:
885  case AArch64::LDRSHXroW:
886  case AArch64::LDRSHXroX:
887  case AArch64::LDRSWroW:
888  case AArch64::LDRSWroX:
889  case AArch64::LDRSroW:
890  case AArch64::LDRSroX:
891  case AArch64::LDRWroW:
892  case AArch64::LDRWroX:
893  case AArch64::LDRXroW:
894  case AArch64::LDRXroX:
895  case AArch64::PRFMroW:
896  case AArch64::PRFMroX:
897  case AArch64::STRBBroW:
898  case AArch64::STRBBroX:
899  case AArch64::STRBroW:
900  case AArch64::STRBroX:
901  case AArch64::STRDroW:
902  case AArch64::STRDroX:
903  case AArch64::STRHHroW:
904  case AArch64::STRHHroX:
905  case AArch64::STRHroW:
906  case AArch64::STRHroX:
907  case AArch64::STRQroW:
908  case AArch64::STRQroX:
909  case AArch64::STRSroW:
910  case AArch64::STRSroX:
911  case AArch64::STRWroW:
912  case AArch64::STRWroX:
913  case AArch64::STRXroW:
914  case AArch64::STRXroX: {
915    unsigned IsSigned = MI.getOperand(3).getImm();
916    return !IsSigned;
917  }
918  }
919}
920
921bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
922  unsigned Opc = MI.getOpcode();
923  switch (Opc) {
924    default:
925      return false;
926    case AArch64::SEH_StackAlloc:
927    case AArch64::SEH_SaveFPLR:
928    case AArch64::SEH_SaveFPLR_X:
929    case AArch64::SEH_SaveReg:
930    case AArch64::SEH_SaveReg_X:
931    case AArch64::SEH_SaveRegP:
932    case AArch64::SEH_SaveRegP_X:
933    case AArch64::SEH_SaveFReg:
934    case AArch64::SEH_SaveFReg_X:
935    case AArch64::SEH_SaveFRegP:
936    case AArch64::SEH_SaveFRegP_X:
937    case AArch64::SEH_SetFP:
938    case AArch64::SEH_AddFP:
939    case AArch64::SEH_Nop:
940    case AArch64::SEH_PrologEnd:
941    case AArch64::SEH_EpilogStart:
942    case AArch64::SEH_EpilogEnd:
943      return true;
944  }
945}
946
947bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
948                                             Register &SrcReg, Register &DstReg,
949                                             unsigned &SubIdx) const {
950  switch (MI.getOpcode()) {
951  default:
952    return false;
953  case AArch64::SBFMXri: // aka sxtw
954  case AArch64::UBFMXri: // aka uxtw
955    // Check for the 32 -> 64 bit extension case, these instructions can do
956    // much more.
957    if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
958      return false;
959    // This is a signed or unsigned 32 -> 64 bit extension.
960    SrcReg = MI.getOperand(1).getReg();
961    DstReg = MI.getOperand(0).getReg();
962    SubIdx = AArch64::sub_32;
963    return true;
964  }
965}
966
967bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
968    const MachineInstr &MIa, const MachineInstr &MIb) const {
969  const TargetRegisterInfo *TRI = &getRegisterInfo();
970  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
971  int64_t OffsetA = 0, OffsetB = 0;
972  unsigned WidthA = 0, WidthB = 0;
973  bool OffsetAIsScalable = false, OffsetBIsScalable = false;
974
975  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
976  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
977
978  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
979      MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
980    return false;
981
982  // Retrieve the base, offset from the base and width. Width
983  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
984  // base are identical, and the offset of a lower memory access +
985  // the width doesn't overlap the offset of a higher memory access,
986  // then the memory accesses are different.
987  // If OffsetAIsScalable and OffsetBIsScalable are both true, they
988  // are assumed to have the same scale (vscale).
989  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
990                                   WidthA, TRI) &&
991      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
992                                   WidthB, TRI)) {
993    if (BaseOpA->isIdenticalTo(*BaseOpB) &&
994        OffsetAIsScalable == OffsetBIsScalable) {
995      int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
996      int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
997      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
998      if (LowOffset + LowWidth <= HighOffset)
999        return true;
1000    }
1001  }
1002  return false;
1003}
1004
1005bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1006                                            const MachineBasicBlock *MBB,
1007                                            const MachineFunction &MF) const {
1008  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
1009    return true;
1010  switch (MI.getOpcode()) {
1011  case AArch64::HINT:
1012    // CSDB hints are scheduling barriers.
1013    if (MI.getOperand(0).getImm() == 0x14)
1014      return true;
1015    break;
1016  case AArch64::DSB:
1017  case AArch64::ISB:
1018    // DSB and ISB also are scheduling barriers.
1019    return true;
1020  default:;
1021  }
1022  return isSEHInstruction(MI);
1023}
1024
1025/// analyzeCompare - For a comparison instruction, return the source registers
1026/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1027/// Return true if the comparison instruction can be analyzed.
1028bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1029                                      Register &SrcReg2, int &CmpMask,
1030                                      int &CmpValue) const {
1031  // The first operand can be a frame index where we'd normally expect a
1032  // register.
1033  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1034  if (!MI.getOperand(1).isReg())
1035    return false;
1036
1037  switch (MI.getOpcode()) {
1038  default:
1039    break;
1040  case AArch64::SUBSWrr:
1041  case AArch64::SUBSWrs:
1042  case AArch64::SUBSWrx:
1043  case AArch64::SUBSXrr:
1044  case AArch64::SUBSXrs:
1045  case AArch64::SUBSXrx:
1046  case AArch64::ADDSWrr:
1047  case AArch64::ADDSWrs:
1048  case AArch64::ADDSWrx:
1049  case AArch64::ADDSXrr:
1050  case AArch64::ADDSXrs:
1051  case AArch64::ADDSXrx:
1052    // Replace SUBSWrr with SUBWrr if NZCV is not used.
1053    SrcReg = MI.getOperand(1).getReg();
1054    SrcReg2 = MI.getOperand(2).getReg();
1055    CmpMask = ~0;
1056    CmpValue = 0;
1057    return true;
1058  case AArch64::SUBSWri:
1059  case AArch64::ADDSWri:
1060  case AArch64::SUBSXri:
1061  case AArch64::ADDSXri:
1062    SrcReg = MI.getOperand(1).getReg();
1063    SrcReg2 = 0;
1064    CmpMask = ~0;
1065    // FIXME: In order to convert CmpValue to 0 or 1
1066    CmpValue = MI.getOperand(2).getImm() != 0;
1067    return true;
1068  case AArch64::ANDSWri:
1069  case AArch64::ANDSXri:
1070    // ANDS does not use the same encoding scheme as the others xxxS
1071    // instructions.
1072    SrcReg = MI.getOperand(1).getReg();
1073    SrcReg2 = 0;
1074    CmpMask = ~0;
1075    // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1076    // while the type of CmpValue is int. When converting uint64_t to int,
1077    // the high 32 bits of uint64_t will be lost.
1078    // In fact it causes a bug in spec2006-483.xalancbmk
1079    // CmpValue is only used to compare with zero in OptimizeCompareInstr
1080    CmpValue = AArch64_AM::decodeLogicalImmediate(
1081                   MI.getOperand(2).getImm(),
1082                   MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1083    return true;
1084  }
1085
1086  return false;
1087}
1088
1089static bool UpdateOperandRegClass(MachineInstr &Instr) {
1090  MachineBasicBlock *MBB = Instr.getParent();
1091  assert(MBB && "Can't get MachineBasicBlock here");
1092  MachineFunction *MF = MBB->getParent();
1093  assert(MF && "Can't get MachineFunction here");
1094  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1095  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1096  MachineRegisterInfo *MRI = &MF->getRegInfo();
1097
1098  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1099       ++OpIdx) {
1100    MachineOperand &MO = Instr.getOperand(OpIdx);
1101    const TargetRegisterClass *OpRegCstraints =
1102        Instr.getRegClassConstraint(OpIdx, TII, TRI);
1103
1104    // If there's no constraint, there's nothing to do.
1105    if (!OpRegCstraints)
1106      continue;
1107    // If the operand is a frame index, there's nothing to do here.
1108    // A frame index operand will resolve correctly during PEI.
1109    if (MO.isFI())
1110      continue;
1111
1112    assert(MO.isReg() &&
1113           "Operand has register constraints without being a register!");
1114
1115    Register Reg = MO.getReg();
1116    if (Register::isPhysicalRegister(Reg)) {
1117      if (!OpRegCstraints->contains(Reg))
1118        return false;
1119    } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1120               !MRI->constrainRegClass(Reg, OpRegCstraints))
1121      return false;
1122  }
1123
1124  return true;
1125}
1126
1127/// Return the opcode that does not set flags when possible - otherwise
1128/// return the original opcode. The caller is responsible to do the actual
1129/// substitution and legality checking.
1130static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1131  // Don't convert all compare instructions, because for some the zero register
1132  // encoding becomes the sp register.
1133  bool MIDefinesZeroReg = false;
1134  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1135    MIDefinesZeroReg = true;
1136
1137  switch (MI.getOpcode()) {
1138  default:
1139    return MI.getOpcode();
1140  case AArch64::ADDSWrr:
1141    return AArch64::ADDWrr;
1142  case AArch64::ADDSWri:
1143    return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1144  case AArch64::ADDSWrs:
1145    return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1146  case AArch64::ADDSWrx:
1147    return AArch64::ADDWrx;
1148  case AArch64::ADDSXrr:
1149    return AArch64::ADDXrr;
1150  case AArch64::ADDSXri:
1151    return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1152  case AArch64::ADDSXrs:
1153    return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1154  case AArch64::ADDSXrx:
1155    return AArch64::ADDXrx;
1156  case AArch64::SUBSWrr:
1157    return AArch64::SUBWrr;
1158  case AArch64::SUBSWri:
1159    return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1160  case AArch64::SUBSWrs:
1161    return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1162  case AArch64::SUBSWrx:
1163    return AArch64::SUBWrx;
1164  case AArch64::SUBSXrr:
1165    return AArch64::SUBXrr;
1166  case AArch64::SUBSXri:
1167    return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1168  case AArch64::SUBSXrs:
1169    return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1170  case AArch64::SUBSXrx:
1171    return AArch64::SUBXrx;
1172  }
1173}
1174
1175enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1176
1177/// True when condition flags are accessed (either by writing or reading)
1178/// on the instruction trace starting at From and ending at To.
1179///
1180/// Note: If From and To are from different blocks it's assumed CC are accessed
1181///       on the path.
1182static bool areCFlagsAccessedBetweenInstrs(
1183    MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1184    const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1185  // Early exit if To is at the beginning of the BB.
1186  if (To == To->getParent()->begin())
1187    return true;
1188
1189  // Check whether the instructions are in the same basic block
1190  // If not, assume the condition flags might get modified somewhere.
1191  if (To->getParent() != From->getParent())
1192    return true;
1193
1194  // From must be above To.
1195  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1196                      [From](MachineInstr &MI) {
1197                        return MI.getIterator() == From;
1198                      }) != To->getParent()->rend());
1199
1200  // We iterate backward starting at \p To until we hit \p From.
1201  for (const MachineInstr &Instr :
1202       instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1203    if (((AccessToCheck & AK_Write) &&
1204         Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1205        ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1206      return true;
1207  }
1208  return false;
1209}
1210
1211/// Try to optimize a compare instruction. A compare instruction is an
1212/// instruction which produces AArch64::NZCV. It can be truly compare
1213/// instruction
1214/// when there are no uses of its destination register.
1215///
1216/// The following steps are tried in order:
1217/// 1. Convert CmpInstr into an unconditional version.
1218/// 2. Remove CmpInstr if above there is an instruction producing a needed
1219///    condition code or an instruction which can be converted into such an
1220///    instruction.
1221///    Only comparison with zero is supported.
1222bool AArch64InstrInfo::optimizeCompareInstr(
1223    MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
1224    int CmpValue, const MachineRegisterInfo *MRI) const {
1225  assert(CmpInstr.getParent());
1226  assert(MRI);
1227
1228  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1229  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1230  if (DeadNZCVIdx != -1) {
1231    if (CmpInstr.definesRegister(AArch64::WZR) ||
1232        CmpInstr.definesRegister(AArch64::XZR)) {
1233      CmpInstr.eraseFromParent();
1234      return true;
1235    }
1236    unsigned Opc = CmpInstr.getOpcode();
1237    unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1238    if (NewOpc == Opc)
1239      return false;
1240    const MCInstrDesc &MCID = get(NewOpc);
1241    CmpInstr.setDesc(MCID);
1242    CmpInstr.RemoveOperand(DeadNZCVIdx);
1243    bool succeeded = UpdateOperandRegClass(CmpInstr);
1244    (void)succeeded;
1245    assert(succeeded && "Some operands reg class are incompatible!");
1246    return true;
1247  }
1248
1249  // Continue only if we have a "ri" where immediate is zero.
1250  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1251  // function.
1252  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1253  if (CmpValue != 0 || SrcReg2 != 0)
1254    return false;
1255
1256  // CmpInstr is a Compare instruction if destination register is not used.
1257  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1258    return false;
1259
1260  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1261}
1262
1263/// Get opcode of S version of Instr.
1264/// If Instr is S version its opcode is returned.
1265/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1266/// or we are not interested in it.
1267static unsigned sForm(MachineInstr &Instr) {
1268  switch (Instr.getOpcode()) {
1269  default:
1270    return AArch64::INSTRUCTION_LIST_END;
1271
1272  case AArch64::ADDSWrr:
1273  case AArch64::ADDSWri:
1274  case AArch64::ADDSXrr:
1275  case AArch64::ADDSXri:
1276  case AArch64::SUBSWrr:
1277  case AArch64::SUBSWri:
1278  case AArch64::SUBSXrr:
1279  case AArch64::SUBSXri:
1280    return Instr.getOpcode();
1281
1282  case AArch64::ADDWrr:
1283    return AArch64::ADDSWrr;
1284  case AArch64::ADDWri:
1285    return AArch64::ADDSWri;
1286  case AArch64::ADDXrr:
1287    return AArch64::ADDSXrr;
1288  case AArch64::ADDXri:
1289    return AArch64::ADDSXri;
1290  case AArch64::ADCWr:
1291    return AArch64::ADCSWr;
1292  case AArch64::ADCXr:
1293    return AArch64::ADCSXr;
1294  case AArch64::SUBWrr:
1295    return AArch64::SUBSWrr;
1296  case AArch64::SUBWri:
1297    return AArch64::SUBSWri;
1298  case AArch64::SUBXrr:
1299    return AArch64::SUBSXrr;
1300  case AArch64::SUBXri:
1301    return AArch64::SUBSXri;
1302  case AArch64::SBCWr:
1303    return AArch64::SBCSWr;
1304  case AArch64::SBCXr:
1305    return AArch64::SBCSXr;
1306  case AArch64::ANDWri:
1307    return AArch64::ANDSWri;
1308  case AArch64::ANDXri:
1309    return AArch64::ANDSXri;
1310  }
1311}
1312
1313/// Check if AArch64::NZCV should be alive in successors of MBB.
1314static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1315  for (auto *BB : MBB->successors())
1316    if (BB->isLiveIn(AArch64::NZCV))
1317      return true;
1318  return false;
1319}
1320
1321namespace {
1322
1323struct UsedNZCV {
1324  bool N = false;
1325  bool Z = false;
1326  bool C = false;
1327  bool V = false;
1328
1329  UsedNZCV() = default;
1330
1331  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1332    this->N |= UsedFlags.N;
1333    this->Z |= UsedFlags.Z;
1334    this->C |= UsedFlags.C;
1335    this->V |= UsedFlags.V;
1336    return *this;
1337  }
1338};
1339
1340} // end anonymous namespace
1341
1342/// Find a condition code used by the instruction.
1343/// Returns AArch64CC::Invalid if either the instruction does not use condition
1344/// codes or we don't optimize CmpInstr in the presence of such instructions.
1345static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1346  switch (Instr.getOpcode()) {
1347  default:
1348    return AArch64CC::Invalid;
1349
1350  case AArch64::Bcc: {
1351    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1352    assert(Idx >= 2);
1353    return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1354  }
1355
1356  case AArch64::CSINVWr:
1357  case AArch64::CSINVXr:
1358  case AArch64::CSINCWr:
1359  case AArch64::CSINCXr:
1360  case AArch64::CSELWr:
1361  case AArch64::CSELXr:
1362  case AArch64::CSNEGWr:
1363  case AArch64::CSNEGXr:
1364  case AArch64::FCSELSrrr:
1365  case AArch64::FCSELDrrr: {
1366    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1367    assert(Idx >= 1);
1368    return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1369  }
1370  }
1371}
1372
1373static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1374  assert(CC != AArch64CC::Invalid);
1375  UsedNZCV UsedFlags;
1376  switch (CC) {
1377  default:
1378    break;
1379
1380  case AArch64CC::EQ: // Z set
1381  case AArch64CC::NE: // Z clear
1382    UsedFlags.Z = true;
1383    break;
1384
1385  case AArch64CC::HI: // Z clear and C set
1386  case AArch64CC::LS: // Z set   or  C clear
1387    UsedFlags.Z = true;
1388    LLVM_FALLTHROUGH;
1389  case AArch64CC::HS: // C set
1390  case AArch64CC::LO: // C clear
1391    UsedFlags.C = true;
1392    break;
1393
1394  case AArch64CC::MI: // N set
1395  case AArch64CC::PL: // N clear
1396    UsedFlags.N = true;
1397    break;
1398
1399  case AArch64CC::VS: // V set
1400  case AArch64CC::VC: // V clear
1401    UsedFlags.V = true;
1402    break;
1403
1404  case AArch64CC::GT: // Z clear, N and V the same
1405  case AArch64CC::LE: // Z set,   N and V differ
1406    UsedFlags.Z = true;
1407    LLVM_FALLTHROUGH;
1408  case AArch64CC::GE: // N and V the same
1409  case AArch64CC::LT: // N and V differ
1410    UsedFlags.N = true;
1411    UsedFlags.V = true;
1412    break;
1413  }
1414  return UsedFlags;
1415}
1416
1417static bool isADDSRegImm(unsigned Opcode) {
1418  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1419}
1420
1421static bool isSUBSRegImm(unsigned Opcode) {
1422  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1423}
1424
1425/// Check if CmpInstr can be substituted by MI.
1426///
1427/// CmpInstr can be substituted:
1428/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1429/// - and, MI and CmpInstr are from the same MachineBB
1430/// - and, condition flags are not alive in successors of the CmpInstr parent
1431/// - and, if MI opcode is the S form there must be no defs of flags between
1432///        MI and CmpInstr
1433///        or if MI opcode is not the S form there must be neither defs of flags
1434///        nor uses of flags between MI and CmpInstr.
1435/// - and  C/V flags are not used after CmpInstr
1436static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1437                                       const TargetRegisterInfo *TRI) {
1438  assert(MI);
1439  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1440  assert(CmpInstr);
1441
1442  const unsigned CmpOpcode = CmpInstr->getOpcode();
1443  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1444    return false;
1445
1446  if (MI->getParent() != CmpInstr->getParent())
1447    return false;
1448
1449  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1450    return false;
1451
1452  AccessKind AccessToCheck = AK_Write;
1453  if (sForm(*MI) != MI->getOpcode())
1454    AccessToCheck = AK_All;
1455  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1456    return false;
1457
1458  UsedNZCV NZCVUsedAfterCmp;
1459  for (const MachineInstr &Instr :
1460       instructionsWithoutDebug(std::next(CmpInstr->getIterator()),
1461                                CmpInstr->getParent()->instr_end())) {
1462    if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1463      AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1464      if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1465        return false;
1466      NZCVUsedAfterCmp |= getUsedNZCV(CC);
1467    }
1468
1469    if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1470      break;
1471  }
1472
1473  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1474}
1475
1476/// Substitute an instruction comparing to zero with another instruction
1477/// which produces needed condition flags.
1478///
1479/// Return true on success.
1480bool AArch64InstrInfo::substituteCmpToZero(
1481    MachineInstr &CmpInstr, unsigned SrcReg,
1482    const MachineRegisterInfo *MRI) const {
1483  assert(MRI);
1484  // Get the unique definition of SrcReg.
1485  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1486  if (!MI)
1487    return false;
1488
1489  const TargetRegisterInfo *TRI = &getRegisterInfo();
1490
1491  unsigned NewOpc = sForm(*MI);
1492  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1493    return false;
1494
1495  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1496    return false;
1497
1498  // Update the instruction to set NZCV.
1499  MI->setDesc(get(NewOpc));
1500  CmpInstr.eraseFromParent();
1501  bool succeeded = UpdateOperandRegClass(*MI);
1502  (void)succeeded;
1503  assert(succeeded && "Some operands reg class are incompatible!");
1504  MI->addRegisterDefined(AArch64::NZCV, TRI);
1505  return true;
1506}
1507
1508bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1509  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1510      MI.getOpcode() != AArch64::CATCHRET)
1511    return false;
1512
1513  MachineBasicBlock &MBB = *MI.getParent();
1514  auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1515  auto TRI = Subtarget.getRegisterInfo();
1516  DebugLoc DL = MI.getDebugLoc();
1517
1518  if (MI.getOpcode() == AArch64::CATCHRET) {
1519    // Skip to the first instruction before the epilog.
1520    const TargetInstrInfo *TII =
1521      MBB.getParent()->getSubtarget().getInstrInfo();
1522    MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1523    auto MBBI = MachineBasicBlock::iterator(MI);
1524    MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1525    while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1526           FirstEpilogSEH != MBB.begin())
1527      FirstEpilogSEH = std::prev(FirstEpilogSEH);
1528    if (FirstEpilogSEH != MBB.begin())
1529      FirstEpilogSEH = std::next(FirstEpilogSEH);
1530    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1531        .addReg(AArch64::X0, RegState::Define)
1532        .addMBB(TargetMBB);
1533    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1534        .addReg(AArch64::X0, RegState::Define)
1535        .addReg(AArch64::X0)
1536        .addMBB(TargetMBB)
1537        .addImm(0);
1538    return true;
1539  }
1540
1541  Register Reg = MI.getOperand(0).getReg();
1542  const GlobalValue *GV =
1543      cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1544  const TargetMachine &TM = MBB.getParent()->getTarget();
1545  unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1546  const unsigned char MO_NC = AArch64II::MO_NC;
1547
1548  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1549    BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1550        .addGlobalAddress(GV, 0, OpFlags);
1551    if (Subtarget.isTargetILP32()) {
1552      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1553      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1554          .addDef(Reg32, RegState::Dead)
1555          .addUse(Reg, RegState::Kill)
1556          .addImm(0)
1557          .addMemOperand(*MI.memoperands_begin())
1558          .addDef(Reg, RegState::Implicit);
1559    } else {
1560      BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1561          .addReg(Reg, RegState::Kill)
1562          .addImm(0)
1563          .addMemOperand(*MI.memoperands_begin());
1564    }
1565  } else if (TM.getCodeModel() == CodeModel::Large) {
1566    assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
1567    BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1568        .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1569        .addImm(0);
1570    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1571        .addReg(Reg, RegState::Kill)
1572        .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1573        .addImm(16);
1574    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1575        .addReg(Reg, RegState::Kill)
1576        .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1577        .addImm(32);
1578    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1579        .addReg(Reg, RegState::Kill)
1580        .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1581        .addImm(48);
1582    BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1583        .addReg(Reg, RegState::Kill)
1584        .addImm(0)
1585        .addMemOperand(*MI.memoperands_begin());
1586  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1587    BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1588        .addGlobalAddress(GV, 0, OpFlags);
1589  } else {
1590    BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1591        .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1592    unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1593    if (Subtarget.isTargetILP32()) {
1594      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1595      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1596          .addDef(Reg32, RegState::Dead)
1597          .addUse(Reg, RegState::Kill)
1598          .addGlobalAddress(GV, 0, LoFlags)
1599          .addMemOperand(*MI.memoperands_begin())
1600          .addDef(Reg, RegState::Implicit);
1601    } else {
1602      BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1603          .addReg(Reg, RegState::Kill)
1604          .addGlobalAddress(GV, 0, LoFlags)
1605          .addMemOperand(*MI.memoperands_begin());
1606    }
1607  }
1608
1609  MBB.erase(MI);
1610
1611  return true;
1612}
1613
1614// Return true if this instruction simply sets its single destination register
1615// to zero. This is equivalent to a register rename of the zero-register.
1616bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1617  switch (MI.getOpcode()) {
1618  default:
1619    break;
1620  case AArch64::MOVZWi:
1621  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1622    if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1623      assert(MI.getDesc().getNumOperands() == 3 &&
1624             MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1625      return true;
1626    }
1627    break;
1628  case AArch64::ANDWri: // and Rd, Rzr, #imm
1629    return MI.getOperand(1).getReg() == AArch64::WZR;
1630  case AArch64::ANDXri:
1631    return MI.getOperand(1).getReg() == AArch64::XZR;
1632  case TargetOpcode::COPY:
1633    return MI.getOperand(1).getReg() == AArch64::WZR;
1634  }
1635  return false;
1636}
1637
1638// Return true if this instruction simply renames a general register without
1639// modifying bits.
1640bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1641  switch (MI.getOpcode()) {
1642  default:
1643    break;
1644  case TargetOpcode::COPY: {
1645    // GPR32 copies will by lowered to ORRXrs
1646    Register DstReg = MI.getOperand(0).getReg();
1647    return (AArch64::GPR32RegClass.contains(DstReg) ||
1648            AArch64::GPR64RegClass.contains(DstReg));
1649  }
1650  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1651    if (MI.getOperand(1).getReg() == AArch64::XZR) {
1652      assert(MI.getDesc().getNumOperands() == 4 &&
1653             MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1654      return true;
1655    }
1656    break;
1657  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1658    if (MI.getOperand(2).getImm() == 0) {
1659      assert(MI.getDesc().getNumOperands() == 4 &&
1660             MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1661      return true;
1662    }
1663    break;
1664  }
1665  return false;
1666}
1667
1668// Return true if this instruction simply renames a general register without
1669// modifying bits.
1670bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1671  switch (MI.getOpcode()) {
1672  default:
1673    break;
1674  case TargetOpcode::COPY: {
1675    // FPR64 copies will by lowered to ORR.16b
1676    Register DstReg = MI.getOperand(0).getReg();
1677    return (AArch64::FPR64RegClass.contains(DstReg) ||
1678            AArch64::FPR128RegClass.contains(DstReg));
1679  }
1680  case AArch64::ORRv16i8:
1681    if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1682      assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1683             "invalid ORRv16i8 operands");
1684      return true;
1685    }
1686    break;
1687  }
1688  return false;
1689}
1690
1691unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1692                                               int &FrameIndex) const {
1693  switch (MI.getOpcode()) {
1694  default:
1695    break;
1696  case AArch64::LDRWui:
1697  case AArch64::LDRXui:
1698  case AArch64::LDRBui:
1699  case AArch64::LDRHui:
1700  case AArch64::LDRSui:
1701  case AArch64::LDRDui:
1702  case AArch64::LDRQui:
1703    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1704        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1705      FrameIndex = MI.getOperand(1).getIndex();
1706      return MI.getOperand(0).getReg();
1707    }
1708    break;
1709  }
1710
1711  return 0;
1712}
1713
1714unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1715                                              int &FrameIndex) const {
1716  switch (MI.getOpcode()) {
1717  default:
1718    break;
1719  case AArch64::STRWui:
1720  case AArch64::STRXui:
1721  case AArch64::STRBui:
1722  case AArch64::STRHui:
1723  case AArch64::STRSui:
1724  case AArch64::STRDui:
1725  case AArch64::STRQui:
1726  case AArch64::LDR_PXI:
1727  case AArch64::STR_PXI:
1728    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1729        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1730      FrameIndex = MI.getOperand(1).getIndex();
1731      return MI.getOperand(0).getReg();
1732    }
1733    break;
1734  }
1735  return 0;
1736}
1737
1738/// Check all MachineMemOperands for a hint to suppress pairing.
1739bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1740  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1741    return MMO->getFlags() & MOSuppressPair;
1742  });
1743}
1744
1745/// Set a flag on the first MachineMemOperand to suppress pairing.
1746void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1747  if (MI.memoperands_empty())
1748    return;
1749  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1750}
1751
1752/// Check all MachineMemOperands for a hint that the load/store is strided.
1753bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1754  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1755    return MMO->getFlags() & MOStridedAccess;
1756  });
1757}
1758
1759bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1760  switch (Opc) {
1761  default:
1762    return false;
1763  case AArch64::STURSi:
1764  case AArch64::STURDi:
1765  case AArch64::STURQi:
1766  case AArch64::STURBBi:
1767  case AArch64::STURHHi:
1768  case AArch64::STURWi:
1769  case AArch64::STURXi:
1770  case AArch64::LDURSi:
1771  case AArch64::LDURDi:
1772  case AArch64::LDURQi:
1773  case AArch64::LDURWi:
1774  case AArch64::LDURXi:
1775  case AArch64::LDURSWi:
1776  case AArch64::LDURHHi:
1777  case AArch64::LDURBBi:
1778  case AArch64::LDURSBWi:
1779  case AArch64::LDURSHWi:
1780    return true;
1781  }
1782}
1783
1784Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
1785  switch (Opc) {
1786  default: return {};
1787  case AArch64::PRFMui: return AArch64::PRFUMi;
1788  case AArch64::LDRXui: return AArch64::LDURXi;
1789  case AArch64::LDRWui: return AArch64::LDURWi;
1790  case AArch64::LDRBui: return AArch64::LDURBi;
1791  case AArch64::LDRHui: return AArch64::LDURHi;
1792  case AArch64::LDRSui: return AArch64::LDURSi;
1793  case AArch64::LDRDui: return AArch64::LDURDi;
1794  case AArch64::LDRQui: return AArch64::LDURQi;
1795  case AArch64::LDRBBui: return AArch64::LDURBBi;
1796  case AArch64::LDRHHui: return AArch64::LDURHHi;
1797  case AArch64::LDRSBXui: return AArch64::LDURSBXi;
1798  case AArch64::LDRSBWui: return AArch64::LDURSBWi;
1799  case AArch64::LDRSHXui: return AArch64::LDURSHXi;
1800  case AArch64::LDRSHWui: return AArch64::LDURSHWi;
1801  case AArch64::LDRSWui: return AArch64::LDURSWi;
1802  case AArch64::STRXui: return AArch64::STURXi;
1803  case AArch64::STRWui: return AArch64::STURWi;
1804  case AArch64::STRBui: return AArch64::STURBi;
1805  case AArch64::STRHui: return AArch64::STURHi;
1806  case AArch64::STRSui: return AArch64::STURSi;
1807  case AArch64::STRDui: return AArch64::STURDi;
1808  case AArch64::STRQui: return AArch64::STURQi;
1809  case AArch64::STRBBui: return AArch64::STURBBi;
1810  case AArch64::STRHHui: return AArch64::STURHHi;
1811  }
1812}
1813
1814unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
1815  switch (Opc) {
1816  default:
1817    return 2;
1818  case AArch64::LDPXi:
1819  case AArch64::LDPDi:
1820  case AArch64::STPXi:
1821  case AArch64::STPDi:
1822  case AArch64::LDNPXi:
1823  case AArch64::LDNPDi:
1824  case AArch64::STNPXi:
1825  case AArch64::STNPDi:
1826  case AArch64::LDPQi:
1827  case AArch64::STPQi:
1828  case AArch64::LDNPQi:
1829  case AArch64::STNPQi:
1830  case AArch64::LDPWi:
1831  case AArch64::LDPSi:
1832  case AArch64::STPWi:
1833  case AArch64::STPSi:
1834  case AArch64::LDNPWi:
1835  case AArch64::LDNPSi:
1836  case AArch64::STNPWi:
1837  case AArch64::STNPSi:
1838  case AArch64::LDG:
1839  case AArch64::STGPi:
1840  case AArch64::LD1B_IMM:
1841  case AArch64::LD1H_IMM:
1842  case AArch64::LD1W_IMM:
1843  case AArch64::LD1D_IMM:
1844  case AArch64::ST1B_IMM:
1845  case AArch64::ST1H_IMM:
1846  case AArch64::ST1W_IMM:
1847  case AArch64::ST1D_IMM:
1848  case AArch64::LD1B_H_IMM:
1849  case AArch64::LD1SB_H_IMM:
1850  case AArch64::LD1H_S_IMM:
1851  case AArch64::LD1SH_S_IMM:
1852  case AArch64::LD1W_D_IMM:
1853  case AArch64::LD1SW_D_IMM:
1854  case AArch64::ST1B_H_IMM:
1855  case AArch64::ST1H_S_IMM:
1856  case AArch64::ST1W_D_IMM:
1857  case AArch64::LD1B_S_IMM:
1858  case AArch64::LD1SB_S_IMM:
1859  case AArch64::LD1H_D_IMM:
1860  case AArch64::LD1SH_D_IMM:
1861  case AArch64::ST1B_S_IMM:
1862  case AArch64::ST1H_D_IMM:
1863  case AArch64::LD1B_D_IMM:
1864  case AArch64::LD1SB_D_IMM:
1865  case AArch64::ST1B_D_IMM:
1866    return 3;
1867  case AArch64::ADDG:
1868  case AArch64::STGOffset:
1869  case AArch64::LDR_PXI:
1870  case AArch64::STR_PXI:
1871    return 2;
1872  }
1873}
1874
1875bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1876  switch (MI.getOpcode()) {
1877  default:
1878    return false;
1879  // Scaled instructions.
1880  case AArch64::STRSui:
1881  case AArch64::STRDui:
1882  case AArch64::STRQui:
1883  case AArch64::STRXui:
1884  case AArch64::STRWui:
1885  case AArch64::LDRSui:
1886  case AArch64::LDRDui:
1887  case AArch64::LDRQui:
1888  case AArch64::LDRXui:
1889  case AArch64::LDRWui:
1890  case AArch64::LDRSWui:
1891  // Unscaled instructions.
1892  case AArch64::STURSi:
1893  case AArch64::STURDi:
1894  case AArch64::STURQi:
1895  case AArch64::STURWi:
1896  case AArch64::STURXi:
1897  case AArch64::LDURSi:
1898  case AArch64::LDURDi:
1899  case AArch64::LDURQi:
1900  case AArch64::LDURWi:
1901  case AArch64::LDURXi:
1902  case AArch64::LDURSWi:
1903    return true;
1904  }
1905}
1906
1907unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1908                                                   bool &Is64Bit) {
1909  switch (Opc) {
1910  default:
1911    llvm_unreachable("Opcode has no flag setting equivalent!");
1912  // 32-bit cases:
1913  case AArch64::ADDWri:
1914    Is64Bit = false;
1915    return AArch64::ADDSWri;
1916  case AArch64::ADDWrr:
1917    Is64Bit = false;
1918    return AArch64::ADDSWrr;
1919  case AArch64::ADDWrs:
1920    Is64Bit = false;
1921    return AArch64::ADDSWrs;
1922  case AArch64::ADDWrx:
1923    Is64Bit = false;
1924    return AArch64::ADDSWrx;
1925  case AArch64::ANDWri:
1926    Is64Bit = false;
1927    return AArch64::ANDSWri;
1928  case AArch64::ANDWrr:
1929    Is64Bit = false;
1930    return AArch64::ANDSWrr;
1931  case AArch64::ANDWrs:
1932    Is64Bit = false;
1933    return AArch64::ANDSWrs;
1934  case AArch64::BICWrr:
1935    Is64Bit = false;
1936    return AArch64::BICSWrr;
1937  case AArch64::BICWrs:
1938    Is64Bit = false;
1939    return AArch64::BICSWrs;
1940  case AArch64::SUBWri:
1941    Is64Bit = false;
1942    return AArch64::SUBSWri;
1943  case AArch64::SUBWrr:
1944    Is64Bit = false;
1945    return AArch64::SUBSWrr;
1946  case AArch64::SUBWrs:
1947    Is64Bit = false;
1948    return AArch64::SUBSWrs;
1949  case AArch64::SUBWrx:
1950    Is64Bit = false;
1951    return AArch64::SUBSWrx;
1952  // 64-bit cases:
1953  case AArch64::ADDXri:
1954    Is64Bit = true;
1955    return AArch64::ADDSXri;
1956  case AArch64::ADDXrr:
1957    Is64Bit = true;
1958    return AArch64::ADDSXrr;
1959  case AArch64::ADDXrs:
1960    Is64Bit = true;
1961    return AArch64::ADDSXrs;
1962  case AArch64::ADDXrx:
1963    Is64Bit = true;
1964    return AArch64::ADDSXrx;
1965  case AArch64::ANDXri:
1966    Is64Bit = true;
1967    return AArch64::ANDSXri;
1968  case AArch64::ANDXrr:
1969    Is64Bit = true;
1970    return AArch64::ANDSXrr;
1971  case AArch64::ANDXrs:
1972    Is64Bit = true;
1973    return AArch64::ANDSXrs;
1974  case AArch64::BICXrr:
1975    Is64Bit = true;
1976    return AArch64::BICSXrr;
1977  case AArch64::BICXrs:
1978    Is64Bit = true;
1979    return AArch64::BICSXrs;
1980  case AArch64::SUBXri:
1981    Is64Bit = true;
1982    return AArch64::SUBSXri;
1983  case AArch64::SUBXrr:
1984    Is64Bit = true;
1985    return AArch64::SUBSXrr;
1986  case AArch64::SUBXrs:
1987    Is64Bit = true;
1988    return AArch64::SUBSXrs;
1989  case AArch64::SUBXrx:
1990    Is64Bit = true;
1991    return AArch64::SUBSXrx;
1992  }
1993}
1994
1995// Is this a candidate for ld/st merging or pairing?  For example, we don't
1996// touch volatiles or load/stores that have a hint to avoid pair formation.
1997bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
1998  // If this is a volatile load/store, don't mess with it.
1999  if (MI.hasOrderedMemoryRef())
2000    return false;
2001
2002  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2003  assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
2004         "Expected a reg or frame index operand.");
2005  if (!MI.getOperand(2).isImm())
2006    return false;
2007
2008  // Can't merge/pair if the instruction modifies the base register.
2009  // e.g., ldr x0, [x0]
2010  // This case will never occur with an FI base.
2011  if (MI.getOperand(1).isReg()) {
2012    Register BaseReg = MI.getOperand(1).getReg();
2013    const TargetRegisterInfo *TRI = &getRegisterInfo();
2014    if (MI.modifiesRegister(BaseReg, TRI))
2015      return false;
2016  }
2017
2018  // Check if this load/store has a hint to avoid pair formation.
2019  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2020  if (isLdStPairSuppressed(MI))
2021    return false;
2022
2023  // Do not pair any callee-save store/reload instructions in the
2024  // prologue/epilogue if the CFI information encoded the operations as separate
2025  // instructions, as that will cause the size of the actual prologue to mismatch
2026  // with the prologue size recorded in the Windows CFI.
2027  const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2028  bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2029                     MI.getMF()->getFunction().needsUnwindTableEntry();
2030  if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2031                      MI.getFlag(MachineInstr::FrameDestroy)))
2032    return false;
2033
2034  // On some CPUs quad load/store pairs are slower than two single load/stores.
2035  if (Subtarget.isPaired128Slow()) {
2036    switch (MI.getOpcode()) {
2037    default:
2038      break;
2039    case AArch64::LDURQi:
2040    case AArch64::STURQi:
2041    case AArch64::LDRQui:
2042    case AArch64::STRQui:
2043      return false;
2044    }
2045  }
2046
2047  return true;
2048}
2049
2050bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
2051    const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2052    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
2053    const TargetRegisterInfo *TRI) const {
2054  if (!LdSt.mayLoadOrStore())
2055    return false;
2056
2057  const MachineOperand *BaseOp;
2058  if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2059                                    Width, TRI))
2060    return false;
2061  BaseOps.push_back(BaseOp);
2062  return true;
2063}
2064
2065bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
2066    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
2067    bool &OffsetIsScalable, unsigned &Width,
2068    const TargetRegisterInfo *TRI) const {
2069  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2070  // Handle only loads/stores with base register followed by immediate offset.
2071  if (LdSt.getNumExplicitOperands() == 3) {
2072    // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2073    if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2074        !LdSt.getOperand(2).isImm())
2075      return false;
2076  } else if (LdSt.getNumExplicitOperands() == 4) {
2077    // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2078    if (!LdSt.getOperand(1).isReg() ||
2079        (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
2080        !LdSt.getOperand(3).isImm())
2081      return false;
2082  } else
2083    return false;
2084
2085  // Get the scaling factor for the instruction and set the width for the
2086  // instruction.
2087  TypeSize Scale(0U, false);
2088  int64_t Dummy1, Dummy2;
2089
2090  // If this returns false, then it's an instruction we don't want to handle.
2091  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2092    return false;
2093
2094  // Compute the offset. Offset is calculated as the immediate operand
2095  // multiplied by the scaling factor. Unscaled instructions have scaling factor
2096  // set to 1.
2097  if (LdSt.getNumExplicitOperands() == 3) {
2098    BaseOp = &LdSt.getOperand(1);
2099    Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
2100  } else {
2101    assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2102    BaseOp = &LdSt.getOperand(2);
2103    Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
2104  }
2105  OffsetIsScalable = Scale.isScalable();
2106
2107  if (!BaseOp->isReg() && !BaseOp->isFI())
2108    return false;
2109
2110  return true;
2111}
2112
2113MachineOperand &
2114AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2115  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2116  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2117  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2118  return OfsOp;
2119}
2120
2121bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
2122                                    unsigned &Width, int64_t &MinOffset,
2123                                    int64_t &MaxOffset) {
2124  const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
2125  switch (Opcode) {
2126  // Not a memory operation or something we want to handle.
2127  default:
2128    Scale = TypeSize::Fixed(0);
2129    Width = 0;
2130    MinOffset = MaxOffset = 0;
2131    return false;
2132  case AArch64::STRWpost:
2133  case AArch64::LDRWpost:
2134    Width = 32;
2135    Scale = TypeSize::Fixed(4);
2136    MinOffset = -256;
2137    MaxOffset = 255;
2138    break;
2139  case AArch64::LDURQi:
2140  case AArch64::STURQi:
2141    Width = 16;
2142    Scale = TypeSize::Fixed(1);
2143    MinOffset = -256;
2144    MaxOffset = 255;
2145    break;
2146  case AArch64::PRFUMi:
2147  case AArch64::LDURXi:
2148  case AArch64::LDURDi:
2149  case AArch64::STURXi:
2150  case AArch64::STURDi:
2151    Width = 8;
2152    Scale = TypeSize::Fixed(1);
2153    MinOffset = -256;
2154    MaxOffset = 255;
2155    break;
2156  case AArch64::LDURWi:
2157  case AArch64::LDURSi:
2158  case AArch64::LDURSWi:
2159  case AArch64::STURWi:
2160  case AArch64::STURSi:
2161    Width = 4;
2162    Scale = TypeSize::Fixed(1);
2163    MinOffset = -256;
2164    MaxOffset = 255;
2165    break;
2166  case AArch64::LDURHi:
2167  case AArch64::LDURHHi:
2168  case AArch64::LDURSHXi:
2169  case AArch64::LDURSHWi:
2170  case AArch64::STURHi:
2171  case AArch64::STURHHi:
2172    Width = 2;
2173    Scale = TypeSize::Fixed(1);
2174    MinOffset = -256;
2175    MaxOffset = 255;
2176    break;
2177  case AArch64::LDURBi:
2178  case AArch64::LDURBBi:
2179  case AArch64::LDURSBXi:
2180  case AArch64::LDURSBWi:
2181  case AArch64::STURBi:
2182  case AArch64::STURBBi:
2183    Width = 1;
2184    Scale = TypeSize::Fixed(1);
2185    MinOffset = -256;
2186    MaxOffset = 255;
2187    break;
2188  case AArch64::LDPQi:
2189  case AArch64::LDNPQi:
2190  case AArch64::STPQi:
2191  case AArch64::STNPQi:
2192    Scale = TypeSize::Fixed(16);
2193    Width = 32;
2194    MinOffset = -64;
2195    MaxOffset = 63;
2196    break;
2197  case AArch64::LDRQui:
2198  case AArch64::STRQui:
2199    Scale = TypeSize::Fixed(16);
2200    Width = 16;
2201    MinOffset = 0;
2202    MaxOffset = 4095;
2203    break;
2204  case AArch64::LDPXi:
2205  case AArch64::LDPDi:
2206  case AArch64::LDNPXi:
2207  case AArch64::LDNPDi:
2208  case AArch64::STPXi:
2209  case AArch64::STPDi:
2210  case AArch64::STNPXi:
2211  case AArch64::STNPDi:
2212    Scale = TypeSize::Fixed(8);
2213    Width = 16;
2214    MinOffset = -64;
2215    MaxOffset = 63;
2216    break;
2217  case AArch64::PRFMui:
2218  case AArch64::LDRXui:
2219  case AArch64::LDRDui:
2220  case AArch64::STRXui:
2221  case AArch64::STRDui:
2222    Scale = TypeSize::Fixed(8);
2223    Width = 8;
2224    MinOffset = 0;
2225    MaxOffset = 4095;
2226    break;
2227  case AArch64::LDPWi:
2228  case AArch64::LDPSi:
2229  case AArch64::LDNPWi:
2230  case AArch64::LDNPSi:
2231  case AArch64::STPWi:
2232  case AArch64::STPSi:
2233  case AArch64::STNPWi:
2234  case AArch64::STNPSi:
2235    Scale = TypeSize::Fixed(4);
2236    Width = 8;
2237    MinOffset = -64;
2238    MaxOffset = 63;
2239    break;
2240  case AArch64::LDRWui:
2241  case AArch64::LDRSui:
2242  case AArch64::LDRSWui:
2243  case AArch64::STRWui:
2244  case AArch64::STRSui:
2245    Scale = TypeSize::Fixed(4);
2246    Width = 4;
2247    MinOffset = 0;
2248    MaxOffset = 4095;
2249    break;
2250  case AArch64::LDRHui:
2251  case AArch64::LDRHHui:
2252  case AArch64::LDRSHWui:
2253  case AArch64::LDRSHXui:
2254  case AArch64::STRHui:
2255  case AArch64::STRHHui:
2256    Scale = TypeSize::Fixed(2);
2257    Width = 2;
2258    MinOffset = 0;
2259    MaxOffset = 4095;
2260    break;
2261  case AArch64::LDRBui:
2262  case AArch64::LDRBBui:
2263  case AArch64::LDRSBWui:
2264  case AArch64::LDRSBXui:
2265  case AArch64::STRBui:
2266  case AArch64::STRBBui:
2267    Scale = TypeSize::Fixed(1);
2268    Width = 1;
2269    MinOffset = 0;
2270    MaxOffset = 4095;
2271    break;
2272  case AArch64::ADDG:
2273    Scale = TypeSize::Fixed(16);
2274    Width = 0;
2275    MinOffset = 0;
2276    MaxOffset = 63;
2277    break;
2278  case AArch64::TAGPstack:
2279    Scale = TypeSize::Fixed(16);
2280    Width = 0;
2281    // TAGP with a negative offset turns into SUBP, which has a maximum offset
2282    // of 63 (not 64!).
2283    MinOffset = -63;
2284    MaxOffset = 63;
2285    break;
2286  case AArch64::LDG:
2287  case AArch64::STGOffset:
2288  case AArch64::STZGOffset:
2289    Scale = TypeSize::Fixed(16);
2290    Width = 16;
2291    MinOffset = -256;
2292    MaxOffset = 255;
2293    break;
2294  case AArch64::STR_ZZZZXI:
2295  case AArch64::LDR_ZZZZXI:
2296    Scale = TypeSize::Scalable(16);
2297    Width = SVEMaxBytesPerVector * 4;
2298    MinOffset = -256;
2299    MaxOffset = 252;
2300    break;
2301  case AArch64::STR_ZZZXI:
2302  case AArch64::LDR_ZZZXI:
2303    Scale = TypeSize::Scalable(16);
2304    Width = SVEMaxBytesPerVector * 3;
2305    MinOffset = -256;
2306    MaxOffset = 253;
2307    break;
2308  case AArch64::STR_ZZXI:
2309  case AArch64::LDR_ZZXI:
2310    Scale = TypeSize::Scalable(16);
2311    Width = SVEMaxBytesPerVector * 2;
2312    MinOffset = -256;
2313    MaxOffset = 254;
2314    break;
2315  case AArch64::LDR_PXI:
2316  case AArch64::STR_PXI:
2317    Scale = TypeSize::Scalable(2);
2318    Width = SVEMaxBytesPerVector / 8;
2319    MinOffset = -256;
2320    MaxOffset = 255;
2321    break;
2322  case AArch64::LDR_ZXI:
2323  case AArch64::STR_ZXI:
2324    Scale = TypeSize::Scalable(16);
2325    Width = SVEMaxBytesPerVector;
2326    MinOffset = -256;
2327    MaxOffset = 255;
2328    break;
2329  case AArch64::LD1B_IMM:
2330  case AArch64::LD1H_IMM:
2331  case AArch64::LD1W_IMM:
2332  case AArch64::LD1D_IMM:
2333  case AArch64::ST1B_IMM:
2334  case AArch64::ST1H_IMM:
2335  case AArch64::ST1W_IMM:
2336  case AArch64::ST1D_IMM:
2337    // A full vectors worth of data
2338    // Width = mbytes * elements
2339    Scale = TypeSize::Scalable(16);
2340    Width = SVEMaxBytesPerVector;
2341    MinOffset = -8;
2342    MaxOffset = 7;
2343    break;
2344  case AArch64::LD1B_H_IMM:
2345  case AArch64::LD1SB_H_IMM:
2346  case AArch64::LD1H_S_IMM:
2347  case AArch64::LD1SH_S_IMM:
2348  case AArch64::LD1W_D_IMM:
2349  case AArch64::LD1SW_D_IMM:
2350  case AArch64::ST1B_H_IMM:
2351  case AArch64::ST1H_S_IMM:
2352  case AArch64::ST1W_D_IMM:
2353    // A half vector worth of data
2354    // Width = mbytes * elements
2355    Scale = TypeSize::Scalable(8);
2356    Width = SVEMaxBytesPerVector / 2;
2357    MinOffset = -8;
2358    MaxOffset = 7;
2359    break;
2360  case AArch64::LD1B_S_IMM:
2361  case AArch64::LD1SB_S_IMM:
2362  case AArch64::LD1H_D_IMM:
2363  case AArch64::LD1SH_D_IMM:
2364  case AArch64::ST1B_S_IMM:
2365  case AArch64::ST1H_D_IMM:
2366    // A quarter vector worth of data
2367    // Width = mbytes * elements
2368    Scale = TypeSize::Scalable(4);
2369    Width = SVEMaxBytesPerVector / 4;
2370    MinOffset = -8;
2371    MaxOffset = 7;
2372    break;
2373  case AArch64::LD1B_D_IMM:
2374  case AArch64::LD1SB_D_IMM:
2375  case AArch64::ST1B_D_IMM:
2376    // A eighth vector worth of data
2377    // Width = mbytes * elements
2378    Scale = TypeSize::Scalable(2);
2379    Width = SVEMaxBytesPerVector / 8;
2380    MinOffset = -8;
2381    MaxOffset = 7;
2382    break;
2383  case AArch64::ST2GOffset:
2384  case AArch64::STZ2GOffset:
2385    Scale = TypeSize::Fixed(16);
2386    Width = 32;
2387    MinOffset = -256;
2388    MaxOffset = 255;
2389    break;
2390  case AArch64::STGPi:
2391    Scale = TypeSize::Fixed(16);
2392    Width = 16;
2393    MinOffset = -64;
2394    MaxOffset = 63;
2395    break;
2396  }
2397
2398  return true;
2399}
2400
2401// Scaling factor for unscaled load or store.
2402int AArch64InstrInfo::getMemScale(unsigned Opc) {
2403  switch (Opc) {
2404  default:
2405    llvm_unreachable("Opcode has unknown scale!");
2406  case AArch64::LDRBBui:
2407  case AArch64::LDURBBi:
2408  case AArch64::LDRSBWui:
2409  case AArch64::LDURSBWi:
2410  case AArch64::STRBBui:
2411  case AArch64::STURBBi:
2412    return 1;
2413  case AArch64::LDRHHui:
2414  case AArch64::LDURHHi:
2415  case AArch64::LDRSHWui:
2416  case AArch64::LDURSHWi:
2417  case AArch64::STRHHui:
2418  case AArch64::STURHHi:
2419    return 2;
2420  case AArch64::LDRSui:
2421  case AArch64::LDURSi:
2422  case AArch64::LDRSWui:
2423  case AArch64::LDURSWi:
2424  case AArch64::LDRWui:
2425  case AArch64::LDURWi:
2426  case AArch64::STRSui:
2427  case AArch64::STURSi:
2428  case AArch64::STRWui:
2429  case AArch64::STURWi:
2430  case AArch64::LDPSi:
2431  case AArch64::LDPSWi:
2432  case AArch64::LDPWi:
2433  case AArch64::STPSi:
2434  case AArch64::STPWi:
2435    return 4;
2436  case AArch64::LDRDui:
2437  case AArch64::LDURDi:
2438  case AArch64::LDRXui:
2439  case AArch64::LDURXi:
2440  case AArch64::STRDui:
2441  case AArch64::STURDi:
2442  case AArch64::STRXui:
2443  case AArch64::STURXi:
2444  case AArch64::LDPDi:
2445  case AArch64::LDPXi:
2446  case AArch64::STPDi:
2447  case AArch64::STPXi:
2448    return 8;
2449  case AArch64::LDRQui:
2450  case AArch64::LDURQi:
2451  case AArch64::STRQui:
2452  case AArch64::STURQi:
2453  case AArch64::LDPQi:
2454  case AArch64::STPQi:
2455  case AArch64::STGOffset:
2456  case AArch64::STZGOffset:
2457  case AArch64::ST2GOffset:
2458  case AArch64::STZ2GOffset:
2459  case AArch64::STGPi:
2460    return 16;
2461  }
2462}
2463
2464// Scale the unscaled offsets.  Returns false if the unscaled offset can't be
2465// scaled.
2466static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2467  int Scale = AArch64InstrInfo::getMemScale(Opc);
2468
2469  // If the byte-offset isn't a multiple of the stride, we can't scale this
2470  // offset.
2471  if (Offset % Scale != 0)
2472    return false;
2473
2474  // Convert the byte-offset used by unscaled into an "element" offset used
2475  // by the scaled pair load/store instructions.
2476  Offset /= Scale;
2477  return true;
2478}
2479
2480static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2481  if (FirstOpc == SecondOpc)
2482    return true;
2483  // We can also pair sign-ext and zero-ext instructions.
2484  switch (FirstOpc) {
2485  default:
2486    return false;
2487  case AArch64::LDRWui:
2488  case AArch64::LDURWi:
2489    return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2490  case AArch64::LDRSWui:
2491  case AArch64::LDURSWi:
2492    return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2493  }
2494  // These instructions can't be paired based on their opcodes.
2495  return false;
2496}
2497
2498static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2499                            int64_t Offset1, unsigned Opcode1, int FI2,
2500                            int64_t Offset2, unsigned Opcode2) {
2501  // Accesses through fixed stack object frame indices may access a different
2502  // fixed stack slot. Check that the object offsets + offsets match.
2503  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2504    int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2505    int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2506    assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2507    // Convert to scaled object offsets.
2508    int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
2509    if (ObjectOffset1 % Scale1 != 0)
2510      return false;
2511    ObjectOffset1 /= Scale1;
2512    int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
2513    if (ObjectOffset2 % Scale2 != 0)
2514      return false;
2515    ObjectOffset2 /= Scale2;
2516    ObjectOffset1 += Offset1;
2517    ObjectOffset2 += Offset2;
2518    return ObjectOffset1 + 1 == ObjectOffset2;
2519  }
2520
2521  return FI1 == FI2;
2522}
2523
2524/// Detect opportunities for ldp/stp formation.
2525///
2526/// Only called for LdSt for which getMemOperandWithOffset returns true.
2527bool AArch64InstrInfo::shouldClusterMemOps(
2528    ArrayRef<const MachineOperand *> BaseOps1,
2529    ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
2530    unsigned NumBytes) const {
2531  assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2532  const MachineOperand &BaseOp1 = *BaseOps1.front();
2533  const MachineOperand &BaseOp2 = *BaseOps2.front();
2534  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2535  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2536  if (BaseOp1.getType() != BaseOp2.getType())
2537    return false;
2538
2539  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2540         "Only base registers and frame indices are supported.");
2541
2542  // Check for both base regs and base FI.
2543  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2544    return false;
2545
2546  // Only cluster up to a single pair.
2547  if (NumLoads > 2)
2548    return false;
2549
2550  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2551    return false;
2552
2553  // Can we pair these instructions based on their opcodes?
2554  unsigned FirstOpc = FirstLdSt.getOpcode();
2555  unsigned SecondOpc = SecondLdSt.getOpcode();
2556  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2557    return false;
2558
2559  // Can't merge volatiles or load/stores that have a hint to avoid pair
2560  // formation, for example.
2561  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2562      !isCandidateToMergeOrPair(SecondLdSt))
2563    return false;
2564
2565  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2566  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2567  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2568    return false;
2569
2570  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2571  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2572    return false;
2573
2574  // Pairwise instructions have a 7-bit signed offset field.
2575  if (Offset1 > 63 || Offset1 < -64)
2576    return false;
2577
2578  // The caller should already have ordered First/SecondLdSt by offset.
2579  // Note: except for non-equal frame index bases
2580  if (BaseOp1.isFI()) {
2581    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2582           "Caller should have ordered offsets.");
2583
2584    const MachineFrameInfo &MFI =
2585        FirstLdSt.getParent()->getParent()->getFrameInfo();
2586    return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2587                           BaseOp2.getIndex(), Offset2, SecondOpc);
2588  }
2589
2590  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2591
2592  return Offset1 + 1 == Offset2;
2593}
2594
2595static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2596                                            unsigned Reg, unsigned SubIdx,
2597                                            unsigned State,
2598                                            const TargetRegisterInfo *TRI) {
2599  if (!SubIdx)
2600    return MIB.addReg(Reg, State);
2601
2602  if (Register::isPhysicalRegister(Reg))
2603    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2604  return MIB.addReg(Reg, State, SubIdx);
2605}
2606
2607static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2608                                        unsigned NumRegs) {
2609  // We really want the positive remainder mod 32 here, that happens to be
2610  // easily obtainable with a mask.
2611  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2612}
2613
2614void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2615                                        MachineBasicBlock::iterator I,
2616                                        const DebugLoc &DL, MCRegister DestReg,
2617                                        MCRegister SrcReg, bool KillSrc,
2618                                        unsigned Opcode,
2619                                        ArrayRef<unsigned> Indices) const {
2620  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2621  const TargetRegisterInfo *TRI = &getRegisterInfo();
2622  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2623  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2624  unsigned NumRegs = Indices.size();
2625
2626  int SubReg = 0, End = NumRegs, Incr = 1;
2627  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2628    SubReg = NumRegs - 1;
2629    End = -1;
2630    Incr = -1;
2631  }
2632
2633  for (; SubReg != End; SubReg += Incr) {
2634    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2635    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2636    AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2637    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2638  }
2639}
2640
2641void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2642                                       MachineBasicBlock::iterator I,
2643                                       DebugLoc DL, unsigned DestReg,
2644                                       unsigned SrcReg, bool KillSrc,
2645                                       unsigned Opcode, unsigned ZeroReg,
2646                                       llvm::ArrayRef<unsigned> Indices) const {
2647  const TargetRegisterInfo *TRI = &getRegisterInfo();
2648  unsigned NumRegs = Indices.size();
2649
2650#ifndef NDEBUG
2651  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2652  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2653  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2654         "GPR reg sequences should not be able to overlap");
2655#endif
2656
2657  for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2658    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2659    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2660    MIB.addReg(ZeroReg);
2661    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2662    MIB.addImm(0);
2663  }
2664}
2665
2666void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2667                                   MachineBasicBlock::iterator I,
2668                                   const DebugLoc &DL, MCRegister DestReg,
2669                                   MCRegister SrcReg, bool KillSrc) const {
2670  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2671      (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2672    const TargetRegisterInfo *TRI = &getRegisterInfo();
2673
2674    if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2675      // If either operand is WSP, expand to ADD #0.
2676      if (Subtarget.hasZeroCycleRegMove()) {
2677        // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2678        MCRegister DestRegX = TRI->getMatchingSuperReg(
2679            DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2680        MCRegister SrcRegX = TRI->getMatchingSuperReg(
2681            SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2682        // This instruction is reading and writing X registers.  This may upset
2683        // the register scavenger and machine verifier, so we need to indicate
2684        // that we are reading an undefined value from SrcRegX, but a proper
2685        // value from SrcReg.
2686        BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2687            .addReg(SrcRegX, RegState::Undef)
2688            .addImm(0)
2689            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2690            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2691      } else {
2692        BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2693            .addReg(SrcReg, getKillRegState(KillSrc))
2694            .addImm(0)
2695            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2696      }
2697    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2698      BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2699          .addImm(0)
2700          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2701    } else {
2702      if (Subtarget.hasZeroCycleRegMove()) {
2703        // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2704        MCRegister DestRegX = TRI->getMatchingSuperReg(
2705            DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2706        MCRegister SrcRegX = TRI->getMatchingSuperReg(
2707            SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2708        // This instruction is reading and writing X registers.  This may upset
2709        // the register scavenger and machine verifier, so we need to indicate
2710        // that we are reading an undefined value from SrcRegX, but a proper
2711        // value from SrcReg.
2712        BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2713            .addReg(AArch64::XZR)
2714            .addReg(SrcRegX, RegState::Undef)
2715            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2716      } else {
2717        // Otherwise, expand to ORR WZR.
2718        BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2719            .addReg(AArch64::WZR)
2720            .addReg(SrcReg, getKillRegState(KillSrc));
2721      }
2722    }
2723    return;
2724  }
2725
2726  // Copy a Predicate register by ORRing with itself.
2727  if (AArch64::PPRRegClass.contains(DestReg) &&
2728      AArch64::PPRRegClass.contains(SrcReg)) {
2729    assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2730    BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
2731      .addReg(SrcReg) // Pg
2732      .addReg(SrcReg)
2733      .addReg(SrcReg, getKillRegState(KillSrc));
2734    return;
2735  }
2736
2737  // Copy a Z register by ORRing with itself.
2738  if (AArch64::ZPRRegClass.contains(DestReg) &&
2739      AArch64::ZPRRegClass.contains(SrcReg)) {
2740    assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2741    BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
2742      .addReg(SrcReg)
2743      .addReg(SrcReg, getKillRegState(KillSrc));
2744    return;
2745  }
2746
2747  // Copy a Z register pair by copying the individual sub-registers.
2748  if (AArch64::ZPR2RegClass.contains(DestReg) &&
2749      AArch64::ZPR2RegClass.contains(SrcReg)) {
2750    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
2751    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
2752                     Indices);
2753    return;
2754  }
2755
2756  // Copy a Z register triple by copying the individual sub-registers.
2757  if (AArch64::ZPR3RegClass.contains(DestReg) &&
2758      AArch64::ZPR3RegClass.contains(SrcReg)) {
2759    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
2760                                       AArch64::zsub2};
2761    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
2762                     Indices);
2763    return;
2764  }
2765
2766  // Copy a Z register quad by copying the individual sub-registers.
2767  if (AArch64::ZPR4RegClass.contains(DestReg) &&
2768      AArch64::ZPR4RegClass.contains(SrcReg)) {
2769    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
2770                                       AArch64::zsub2, AArch64::zsub3};
2771    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
2772                     Indices);
2773    return;
2774  }
2775
2776  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2777      (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2778    if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2779      // If either operand is SP, expand to ADD #0.
2780      BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2781          .addReg(SrcReg, getKillRegState(KillSrc))
2782          .addImm(0)
2783          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2784    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2785      BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2786          .addImm(0)
2787          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2788    } else {
2789      // Otherwise, expand to ORR XZR.
2790      BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2791          .addReg(AArch64::XZR)
2792          .addReg(SrcReg, getKillRegState(KillSrc));
2793    }
2794    return;
2795  }
2796
2797  // Copy a DDDD register quad by copying the individual sub-registers.
2798  if (AArch64::DDDDRegClass.contains(DestReg) &&
2799      AArch64::DDDDRegClass.contains(SrcReg)) {
2800    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2801                                       AArch64::dsub2, AArch64::dsub3};
2802    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2803                     Indices);
2804    return;
2805  }
2806
2807  // Copy a DDD register triple by copying the individual sub-registers.
2808  if (AArch64::DDDRegClass.contains(DestReg) &&
2809      AArch64::DDDRegClass.contains(SrcReg)) {
2810    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2811                                       AArch64::dsub2};
2812    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2813                     Indices);
2814    return;
2815  }
2816
2817  // Copy a DD register pair by copying the individual sub-registers.
2818  if (AArch64::DDRegClass.contains(DestReg) &&
2819      AArch64::DDRegClass.contains(SrcReg)) {
2820    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2821    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2822                     Indices);
2823    return;
2824  }
2825
2826  // Copy a QQQQ register quad by copying the individual sub-registers.
2827  if (AArch64::QQQQRegClass.contains(DestReg) &&
2828      AArch64::QQQQRegClass.contains(SrcReg)) {
2829    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2830                                       AArch64::qsub2, AArch64::qsub3};
2831    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2832                     Indices);
2833    return;
2834  }
2835
2836  // Copy a QQQ register triple by copying the individual sub-registers.
2837  if (AArch64::QQQRegClass.contains(DestReg) &&
2838      AArch64::QQQRegClass.contains(SrcReg)) {
2839    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2840                                       AArch64::qsub2};
2841    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2842                     Indices);
2843    return;
2844  }
2845
2846  // Copy a QQ register pair by copying the individual sub-registers.
2847  if (AArch64::QQRegClass.contains(DestReg) &&
2848      AArch64::QQRegClass.contains(SrcReg)) {
2849    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2850    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2851                     Indices);
2852    return;
2853  }
2854
2855  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2856      AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2857    static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2858    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2859                    AArch64::XZR, Indices);
2860    return;
2861  }
2862
2863  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2864      AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2865    static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2866    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2867                    AArch64::WZR, Indices);
2868    return;
2869  }
2870
2871  if (AArch64::FPR128RegClass.contains(DestReg) &&
2872      AArch64::FPR128RegClass.contains(SrcReg)) {
2873    if (Subtarget.hasNEON()) {
2874      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2875          .addReg(SrcReg)
2876          .addReg(SrcReg, getKillRegState(KillSrc));
2877    } else {
2878      BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2879          .addReg(AArch64::SP, RegState::Define)
2880          .addReg(SrcReg, getKillRegState(KillSrc))
2881          .addReg(AArch64::SP)
2882          .addImm(-16);
2883      BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2884          .addReg(AArch64::SP, RegState::Define)
2885          .addReg(DestReg, RegState::Define)
2886          .addReg(AArch64::SP)
2887          .addImm(16);
2888    }
2889    return;
2890  }
2891
2892  if (AArch64::FPR64RegClass.contains(DestReg) &&
2893      AArch64::FPR64RegClass.contains(SrcReg)) {
2894    if (Subtarget.hasNEON()) {
2895      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2896                                       &AArch64::FPR128RegClass);
2897      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2898                                      &AArch64::FPR128RegClass);
2899      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2900          .addReg(SrcReg)
2901          .addReg(SrcReg, getKillRegState(KillSrc));
2902    } else {
2903      BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2904          .addReg(SrcReg, getKillRegState(KillSrc));
2905    }
2906    return;
2907  }
2908
2909  if (AArch64::FPR32RegClass.contains(DestReg) &&
2910      AArch64::FPR32RegClass.contains(SrcReg)) {
2911    if (Subtarget.hasNEON()) {
2912      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2913                                       &AArch64::FPR128RegClass);
2914      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2915                                      &AArch64::FPR128RegClass);
2916      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2917          .addReg(SrcReg)
2918          .addReg(SrcReg, getKillRegState(KillSrc));
2919    } else {
2920      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2921          .addReg(SrcReg, getKillRegState(KillSrc));
2922    }
2923    return;
2924  }
2925
2926  if (AArch64::FPR16RegClass.contains(DestReg) &&
2927      AArch64::FPR16RegClass.contains(SrcReg)) {
2928    if (Subtarget.hasNEON()) {
2929      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2930                                       &AArch64::FPR128RegClass);
2931      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2932                                      &AArch64::FPR128RegClass);
2933      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2934          .addReg(SrcReg)
2935          .addReg(SrcReg, getKillRegState(KillSrc));
2936    } else {
2937      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2938                                       &AArch64::FPR32RegClass);
2939      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2940                                      &AArch64::FPR32RegClass);
2941      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2942          .addReg(SrcReg, getKillRegState(KillSrc));
2943    }
2944    return;
2945  }
2946
2947  if (AArch64::FPR8RegClass.contains(DestReg) &&
2948      AArch64::FPR8RegClass.contains(SrcReg)) {
2949    if (Subtarget.hasNEON()) {
2950      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2951                                       &AArch64::FPR128RegClass);
2952      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2953                                      &AArch64::FPR128RegClass);
2954      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2955          .addReg(SrcReg)
2956          .addReg(SrcReg, getKillRegState(KillSrc));
2957    } else {
2958      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2959                                       &AArch64::FPR32RegClass);
2960      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2961                                      &AArch64::FPR32RegClass);
2962      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2963          .addReg(SrcReg, getKillRegState(KillSrc));
2964    }
2965    return;
2966  }
2967
2968  // Copies between GPR64 and FPR64.
2969  if (AArch64::FPR64RegClass.contains(DestReg) &&
2970      AArch64::GPR64RegClass.contains(SrcReg)) {
2971    BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2972        .addReg(SrcReg, getKillRegState(KillSrc));
2973    return;
2974  }
2975  if (AArch64::GPR64RegClass.contains(DestReg) &&
2976      AArch64::FPR64RegClass.contains(SrcReg)) {
2977    BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2978        .addReg(SrcReg, getKillRegState(KillSrc));
2979    return;
2980  }
2981  // Copies between GPR32 and FPR32.
2982  if (AArch64::FPR32RegClass.contains(DestReg) &&
2983      AArch64::GPR32RegClass.contains(SrcReg)) {
2984    BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2985        .addReg(SrcReg, getKillRegState(KillSrc));
2986    return;
2987  }
2988  if (AArch64::GPR32RegClass.contains(DestReg) &&
2989      AArch64::FPR32RegClass.contains(SrcReg)) {
2990    BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2991        .addReg(SrcReg, getKillRegState(KillSrc));
2992    return;
2993  }
2994
2995  if (DestReg == AArch64::NZCV) {
2996    assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2997    BuildMI(MBB, I, DL, get(AArch64::MSR))
2998        .addImm(AArch64SysReg::NZCV)
2999        .addReg(SrcReg, getKillRegState(KillSrc))
3000        .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
3001    return;
3002  }
3003
3004  if (SrcReg == AArch64::NZCV) {
3005    assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
3006    BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
3007        .addImm(AArch64SysReg::NZCV)
3008        .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
3009    return;
3010  }
3011
3012  llvm_unreachable("unimplemented reg-to-reg copy");
3013}
3014
3015static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
3016                                    MachineBasicBlock &MBB,
3017                                    MachineBasicBlock::iterator InsertBefore,
3018                                    const MCInstrDesc &MCID,
3019                                    Register SrcReg, bool IsKill,
3020                                    unsigned SubIdx0, unsigned SubIdx1, int FI,
3021                                    MachineMemOperand *MMO) {
3022  Register SrcReg0 = SrcReg;
3023  Register SrcReg1 = SrcReg;
3024  if (Register::isPhysicalRegister(SrcReg)) {
3025    SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
3026    SubIdx0 = 0;
3027    SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
3028    SubIdx1 = 0;
3029  }
3030  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3031      .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
3032      .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
3033      .addFrameIndex(FI)
3034      .addImm(0)
3035      .addMemOperand(MMO);
3036}
3037
3038void AArch64InstrInfo::storeRegToStackSlot(
3039    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
3040    bool isKill, int FI, const TargetRegisterClass *RC,
3041    const TargetRegisterInfo *TRI) const {
3042  MachineFunction &MF = *MBB.getParent();
3043  MachineFrameInfo &MFI = MF.getFrameInfo();
3044
3045  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
3046  MachineMemOperand *MMO =
3047      MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
3048                              MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3049  unsigned Opc = 0;
3050  bool Offset = true;
3051  unsigned StackID = TargetStackID::Default;
3052  switch (TRI->getSpillSize(*RC)) {
3053  case 1:
3054    if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3055      Opc = AArch64::STRBui;
3056    break;
3057  case 2:
3058    if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3059      Opc = AArch64::STRHui;
3060    else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3061      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3062      Opc = AArch64::STR_PXI;
3063      StackID = TargetStackID::SVEVector;
3064    }
3065    break;
3066  case 4:
3067    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3068      Opc = AArch64::STRWui;
3069      if (Register::isVirtualRegister(SrcReg))
3070        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
3071      else
3072        assert(SrcReg != AArch64::WSP);
3073    } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3074      Opc = AArch64::STRSui;
3075    break;
3076  case 8:
3077    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3078      Opc = AArch64::STRXui;
3079      if (Register::isVirtualRegister(SrcReg))
3080        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3081      else
3082        assert(SrcReg != AArch64::SP);
3083    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3084      Opc = AArch64::STRDui;
3085    } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3086      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
3087                              get(AArch64::STPWi), SrcReg, isKill,
3088                              AArch64::sube32, AArch64::subo32, FI, MMO);
3089      return;
3090    }
3091    break;
3092  case 16:
3093    if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3094      Opc = AArch64::STRQui;
3095    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3096      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3097      Opc = AArch64::ST1Twov1d;
3098      Offset = false;
3099    } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3100      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
3101                              get(AArch64::STPXi), SrcReg, isKill,
3102                              AArch64::sube64, AArch64::subo64, FI, MMO);
3103      return;
3104    } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3105      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3106      Opc = AArch64::STR_ZXI;
3107      StackID = TargetStackID::SVEVector;
3108    }
3109    break;
3110  case 24:
3111    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3112      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3113      Opc = AArch64::ST1Threev1d;
3114      Offset = false;
3115    }
3116    break;
3117  case 32:
3118    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3119      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3120      Opc = AArch64::ST1Fourv1d;
3121      Offset = false;
3122    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3123      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3124      Opc = AArch64::ST1Twov2d;
3125      Offset = false;
3126    } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
3127      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3128      Opc = AArch64::STR_ZZXI;
3129      StackID = TargetStackID::SVEVector;
3130    }
3131    break;
3132  case 48:
3133    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3134      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3135      Opc = AArch64::ST1Threev2d;
3136      Offset = false;
3137    } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
3138      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3139      Opc = AArch64::STR_ZZZXI;
3140      StackID = TargetStackID::SVEVector;
3141    }
3142    break;
3143  case 64:
3144    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3145      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3146      Opc = AArch64::ST1Fourv2d;
3147      Offset = false;
3148    } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
3149      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3150      Opc = AArch64::STR_ZZZZXI;
3151      StackID = TargetStackID::SVEVector;
3152    }
3153    break;
3154  }
3155  assert(Opc && "Unknown register class");
3156  MFI.setStackID(FI, StackID);
3157
3158  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3159                                     .addReg(SrcReg, getKillRegState(isKill))
3160                                     .addFrameIndex(FI);
3161
3162  if (Offset)
3163    MI.addImm(0);
3164  MI.addMemOperand(MMO);
3165}
3166
3167static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
3168                                     MachineBasicBlock &MBB,
3169                                     MachineBasicBlock::iterator InsertBefore,
3170                                     const MCInstrDesc &MCID,
3171                                     Register DestReg, unsigned SubIdx0,
3172                                     unsigned SubIdx1, int FI,
3173                                     MachineMemOperand *MMO) {
3174  Register DestReg0 = DestReg;
3175  Register DestReg1 = DestReg;
3176  bool IsUndef = true;
3177  if (Register::isPhysicalRegister(DestReg)) {
3178    DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
3179    SubIdx0 = 0;
3180    DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
3181    SubIdx1 = 0;
3182    IsUndef = false;
3183  }
3184  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3185      .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
3186      .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
3187      .addFrameIndex(FI)
3188      .addImm(0)
3189      .addMemOperand(MMO);
3190}
3191
3192void AArch64InstrInfo::loadRegFromStackSlot(
3193    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
3194    int FI, const TargetRegisterClass *RC,
3195    const TargetRegisterInfo *TRI) const {
3196  MachineFunction &MF = *MBB.getParent();
3197  MachineFrameInfo &MFI = MF.getFrameInfo();
3198  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
3199  MachineMemOperand *MMO =
3200      MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
3201                              MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3202
3203  unsigned Opc = 0;
3204  bool Offset = true;
3205  unsigned StackID = TargetStackID::Default;
3206  switch (TRI->getSpillSize(*RC)) {
3207  case 1:
3208    if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3209      Opc = AArch64::LDRBui;
3210    break;
3211  case 2:
3212    if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3213      Opc = AArch64::LDRHui;
3214    else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3215      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3216      Opc = AArch64::LDR_PXI;
3217      StackID = TargetStackID::SVEVector;
3218    }
3219    break;
3220  case 4:
3221    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3222      Opc = AArch64::LDRWui;
3223      if (Register::isVirtualRegister(DestReg))
3224        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
3225      else
3226        assert(DestReg != AArch64::WSP);
3227    } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3228      Opc = AArch64::LDRSui;
3229    break;
3230  case 8:
3231    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3232      Opc = AArch64::LDRXui;
3233      if (Register::isVirtualRegister(DestReg))
3234        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
3235      else
3236        assert(DestReg != AArch64::SP);
3237    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3238      Opc = AArch64::LDRDui;
3239    } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3240      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3241                               get(AArch64::LDPWi), DestReg, AArch64::sube32,
3242                               AArch64::subo32, FI, MMO);
3243      return;
3244    }
3245    break;
3246  case 16:
3247    if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3248      Opc = AArch64::LDRQui;
3249    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3250      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3251      Opc = AArch64::LD1Twov1d;
3252      Offset = false;
3253    } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3254      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3255                               get(AArch64::LDPXi), DestReg, AArch64::sube64,
3256                               AArch64::subo64, FI, MMO);
3257      return;
3258    } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3259      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3260      Opc = AArch64::LDR_ZXI;
3261      StackID = TargetStackID::SVEVector;
3262    }
3263    break;
3264  case 24:
3265    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3266      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3267      Opc = AArch64::LD1Threev1d;
3268      Offset = false;
3269    }
3270    break;
3271  case 32:
3272    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3273      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3274      Opc = AArch64::LD1Fourv1d;
3275      Offset = false;
3276    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3277      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3278      Opc = AArch64::LD1Twov2d;
3279      Offset = false;
3280    } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
3281      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3282      Opc = AArch64::LDR_ZZXI;
3283      StackID = TargetStackID::SVEVector;
3284    }
3285    break;
3286  case 48:
3287    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3288      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3289      Opc = AArch64::LD1Threev2d;
3290      Offset = false;
3291    } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
3292      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3293      Opc = AArch64::LDR_ZZZXI;
3294      StackID = TargetStackID::SVEVector;
3295    }
3296    break;
3297  case 64:
3298    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3299      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3300      Opc = AArch64::LD1Fourv2d;
3301      Offset = false;
3302    } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
3303      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3304      Opc = AArch64::LDR_ZZZZXI;
3305      StackID = TargetStackID::SVEVector;
3306    }
3307    break;
3308  }
3309
3310  assert(Opc && "Unknown register class");
3311  MFI.setStackID(FI, StackID);
3312
3313  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3314                                     .addReg(DestReg, getDefRegState(true))
3315                                     .addFrameIndex(FI);
3316  if (Offset)
3317    MI.addImm(0);
3318  MI.addMemOperand(MMO);
3319}
3320
3321bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
3322                                           const MachineInstr &UseMI,
3323                                           const TargetRegisterInfo *TRI) {
3324  return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
3325                                         UseMI.getIterator()),
3326                [TRI](const MachineInstr &I) {
3327                  return I.modifiesRegister(AArch64::NZCV, TRI) ||
3328                         I.readsRegister(AArch64::NZCV, TRI);
3329                });
3330}
3331
3332// Helper function to emit a frame offset adjustment from a given
3333// pointer (SrcReg), stored into DestReg. This function is explicit
3334// in that it requires the opcode.
3335static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
3336                               MachineBasicBlock::iterator MBBI,
3337                               const DebugLoc &DL, unsigned DestReg,
3338                               unsigned SrcReg, int64_t Offset, unsigned Opc,
3339                               const TargetInstrInfo *TII,
3340                               MachineInstr::MIFlag Flag, bool NeedsWinCFI,
3341                               bool *HasWinCFI) {
3342  int Sign = 1;
3343  unsigned MaxEncoding, ShiftSize;
3344  switch (Opc) {
3345  case AArch64::ADDXri:
3346  case AArch64::ADDSXri:
3347  case AArch64::SUBXri:
3348  case AArch64::SUBSXri:
3349    MaxEncoding = 0xfff;
3350    ShiftSize = 12;
3351    break;
3352  case AArch64::ADDVL_XXI:
3353  case AArch64::ADDPL_XXI:
3354    MaxEncoding = 31;
3355    ShiftSize = 0;
3356    if (Offset < 0) {
3357      MaxEncoding = 32;
3358      Sign = -1;
3359      Offset = -Offset;
3360    }
3361    break;
3362  default:
3363    llvm_unreachable("Unsupported opcode");
3364  }
3365
3366  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3367  // scratch register.  If DestReg is a virtual register, use it as the
3368  // scratch register; otherwise, create a new virtual register (to be
3369  // replaced by the scavenger at the end of PEI).  That case can be optimized
3370  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3371  // register can be loaded with offset%8 and the add/sub can use an extending
3372  // instruction with LSL#3.
3373  // Currently the function handles any offsets but generates a poor sequence
3374  // of code.
3375  //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3376
3377  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3378  Register TmpReg = DestReg;
3379  if (TmpReg == AArch64::XZR)
3380    TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
3381        &AArch64::GPR64RegClass);
3382  do {
3383    uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
3384    unsigned LocalShiftSize = 0;
3385    if (ThisVal > MaxEncoding) {
3386      ThisVal = ThisVal >> ShiftSize;
3387      LocalShiftSize = ShiftSize;
3388    }
3389    assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3390           "Encoding cannot handle value that big");
3391
3392    Offset -= ThisVal << LocalShiftSize;
3393    if (Offset == 0)
3394      TmpReg = DestReg;
3395    auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
3396                   .addReg(SrcReg)
3397                   .addImm(Sign * (int)ThisVal);
3398    if (ShiftSize)
3399      MBI = MBI.addImm(
3400          AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
3401    MBI = MBI.setMIFlag(Flag);
3402
3403    if (NeedsWinCFI) {
3404      assert(Sign == 1 && "SEH directives should always have a positive sign");
3405      int Imm = (int)(ThisVal << LocalShiftSize);
3406      if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
3407          (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
3408        if (HasWinCFI)
3409          *HasWinCFI = true;
3410        if (Imm == 0)
3411          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
3412        else
3413          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
3414              .addImm(Imm)
3415              .setMIFlag(Flag);
3416        assert(Offset == 0 && "Expected remaining offset to be zero to "
3417                              "emit a single SEH directive");
3418      } else if (DestReg == AArch64::SP) {
3419        if (HasWinCFI)
3420          *HasWinCFI = true;
3421        assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
3422        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
3423            .addImm(Imm)
3424            .setMIFlag(Flag);
3425      }
3426      if (HasWinCFI)
3427        *HasWinCFI = true;
3428    }
3429
3430    SrcReg = TmpReg;
3431  } while (Offset);
3432}
3433
3434void llvm::emitFrameOffset(MachineBasicBlock &MBB,
3435                           MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
3436                           unsigned DestReg, unsigned SrcReg,
3437                           StackOffset Offset, const TargetInstrInfo *TII,
3438                           MachineInstr::MIFlag Flag, bool SetNZCV,
3439                           bool NeedsWinCFI, bool *HasWinCFI) {
3440  int64_t Bytes, NumPredicateVectors, NumDataVectors;
3441  Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
3442
3443  // First emit non-scalable frame offsets, or a simple 'mov'.
3444  if (Bytes || (!Offset && SrcReg != DestReg)) {
3445    assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
3446           "SP increment/decrement not 16-byte aligned");
3447    unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
3448    if (Bytes < 0) {
3449      Bytes = -Bytes;
3450      Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
3451    }
3452    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
3453                       NeedsWinCFI, HasWinCFI);
3454    SrcReg = DestReg;
3455  }
3456
3457  assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
3458         "SetNZCV not supported with SVE vectors");
3459  assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
3460         "WinCFI not supported with SVE vectors");
3461
3462  if (NumDataVectors) {
3463    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
3464                       AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3465    SrcReg = DestReg;
3466  }
3467
3468  if (NumPredicateVectors) {
3469    assert(DestReg != AArch64::SP && "Unaligned access to SP");
3470    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
3471                       AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3472  }
3473}
3474
3475MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3476    MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3477    MachineBasicBlock::iterator InsertPt, int FrameIndex,
3478    LiveIntervals *LIS, VirtRegMap *VRM) const {
3479  // This is a bit of a hack. Consider this instruction:
3480  //
3481  //   %0 = COPY %sp; GPR64all:%0
3482  //
3483  // We explicitly chose GPR64all for the virtual register so such a copy might
3484  // be eliminated by RegisterCoalescer. However, that may not be possible, and
3485  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3486  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3487  //
3488  // To prevent that, we are going to constrain the %0 register class here.
3489  //
3490  // <rdar://problem/11522048>
3491  //
3492  if (MI.isFullCopy()) {
3493    Register DstReg = MI.getOperand(0).getReg();
3494    Register SrcReg = MI.getOperand(1).getReg();
3495    if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
3496      MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3497      return nullptr;
3498    }
3499    if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
3500      MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3501      return nullptr;
3502    }
3503  }
3504
3505  // Handle the case where a copy is being spilled or filled but the source
3506  // and destination register class don't match.  For example:
3507  //
3508  //   %0 = COPY %xzr; GPR64common:%0
3509  //
3510  // In this case we can still safely fold away the COPY and generate the
3511  // following spill code:
3512  //
3513  //   STRXui %xzr, %stack.0
3514  //
3515  // This also eliminates spilled cross register class COPYs (e.g. between x and
3516  // d regs) of the same size.  For example:
3517  //
3518  //   %0 = COPY %1; GPR64:%0, FPR64:%1
3519  //
3520  // will be filled as
3521  //
3522  //   LDRDui %0, fi<#0>
3523  //
3524  // instead of
3525  //
3526  //   LDRXui %Temp, fi<#0>
3527  //   %0 = FMOV %Temp
3528  //
3529  if (MI.isCopy() && Ops.size() == 1 &&
3530      // Make sure we're only folding the explicit COPY defs/uses.
3531      (Ops[0] == 0 || Ops[0] == 1)) {
3532    bool IsSpill = Ops[0] == 0;
3533    bool IsFill = !IsSpill;
3534    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3535    const MachineRegisterInfo &MRI = MF.getRegInfo();
3536    MachineBasicBlock &MBB = *MI.getParent();
3537    const MachineOperand &DstMO = MI.getOperand(0);
3538    const MachineOperand &SrcMO = MI.getOperand(1);
3539    Register DstReg = DstMO.getReg();
3540    Register SrcReg = SrcMO.getReg();
3541    // This is slightly expensive to compute for physical regs since
3542    // getMinimalPhysRegClass is slow.
3543    auto getRegClass = [&](unsigned Reg) {
3544      return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
3545                                              : TRI.getMinimalPhysRegClass(Reg);
3546    };
3547
3548    if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3549      assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3550                 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3551             "Mismatched register size in non subreg COPY");
3552      if (IsSpill)
3553        storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3554                            getRegClass(SrcReg), &TRI);
3555      else
3556        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3557                             getRegClass(DstReg), &TRI);
3558      return &*--InsertPt;
3559    }
3560
3561    // Handle cases like spilling def of:
3562    //
3563    //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3564    //
3565    // where the physical register source can be widened and stored to the full
3566    // virtual reg destination stack slot, in this case producing:
3567    //
3568    //   STRXui %xzr, %stack.0
3569    //
3570    if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
3571      assert(SrcMO.getSubReg() == 0 &&
3572             "Unexpected subreg on physical register");
3573      const TargetRegisterClass *SpillRC;
3574      unsigned SpillSubreg;
3575      switch (DstMO.getSubReg()) {
3576      default:
3577        SpillRC = nullptr;
3578        break;
3579      case AArch64::sub_32:
3580      case AArch64::ssub:
3581        if (AArch64::GPR32RegClass.contains(SrcReg)) {
3582          SpillRC = &AArch64::GPR64RegClass;
3583          SpillSubreg = AArch64::sub_32;
3584        } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3585          SpillRC = &AArch64::FPR64RegClass;
3586          SpillSubreg = AArch64::ssub;
3587        } else
3588          SpillRC = nullptr;
3589        break;
3590      case AArch64::dsub:
3591        if (AArch64::FPR64RegClass.contains(SrcReg)) {
3592          SpillRC = &AArch64::FPR128RegClass;
3593          SpillSubreg = AArch64::dsub;
3594        } else
3595          SpillRC = nullptr;
3596        break;
3597      }
3598
3599      if (SpillRC)
3600        if (unsigned WidenedSrcReg =
3601                TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3602          storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3603                              FrameIndex, SpillRC, &TRI);
3604          return &*--InsertPt;
3605        }
3606    }
3607
3608    // Handle cases like filling use of:
3609    //
3610    //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3611    //
3612    // where we can load the full virtual reg source stack slot, into the subreg
3613    // destination, in this case producing:
3614    //
3615    //   LDRWui %0:sub_32<def,read-undef>, %stack.0
3616    //
3617    if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3618      const TargetRegisterClass *FillRC;
3619      switch (DstMO.getSubReg()) {
3620      default:
3621        FillRC = nullptr;
3622        break;
3623      case AArch64::sub_32:
3624        FillRC = &AArch64::GPR32RegClass;
3625        break;
3626      case AArch64::ssub:
3627        FillRC = &AArch64::FPR32RegClass;
3628        break;
3629      case AArch64::dsub:
3630        FillRC = &AArch64::FPR64RegClass;
3631        break;
3632      }
3633
3634      if (FillRC) {
3635        assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3636                   TRI.getRegSizeInBits(*FillRC) &&
3637               "Mismatched regclass size on folded subreg COPY");
3638        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3639        MachineInstr &LoadMI = *--InsertPt;
3640        MachineOperand &LoadDst = LoadMI.getOperand(0);
3641        assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3642        LoadDst.setSubReg(DstMO.getSubReg());
3643        LoadDst.setIsUndef();
3644        return &LoadMI;
3645      }
3646    }
3647  }
3648
3649  // Cannot fold.
3650  return nullptr;
3651}
3652
3653int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
3654                                    StackOffset &SOffset,
3655                                    bool *OutUseUnscaledOp,
3656                                    unsigned *OutUnscaledOp,
3657                                    int64_t *EmittableOffset) {
3658  // Set output values in case of early exit.
3659  if (EmittableOffset)
3660    *EmittableOffset = 0;
3661  if (OutUseUnscaledOp)
3662    *OutUseUnscaledOp = false;
3663  if (OutUnscaledOp)
3664    *OutUnscaledOp = 0;
3665
3666  // Exit early for structured vector spills/fills as they can't take an
3667  // immediate offset.
3668  switch (MI.getOpcode()) {
3669  default:
3670    break;
3671  case AArch64::LD1Twov2d:
3672  case AArch64::LD1Threev2d:
3673  case AArch64::LD1Fourv2d:
3674  case AArch64::LD1Twov1d:
3675  case AArch64::LD1Threev1d:
3676  case AArch64::LD1Fourv1d:
3677  case AArch64::ST1Twov2d:
3678  case AArch64::ST1Threev2d:
3679  case AArch64::ST1Fourv2d:
3680  case AArch64::ST1Twov1d:
3681  case AArch64::ST1Threev1d:
3682  case AArch64::ST1Fourv1d:
3683  case AArch64::IRG:
3684  case AArch64::IRGstack:
3685  case AArch64::STGloop:
3686  case AArch64::STZGloop:
3687    return AArch64FrameOffsetCannotUpdate;
3688  }
3689
3690  // Get the min/max offset and the scale.
3691  TypeSize ScaleValue(0U, false);
3692  unsigned Width;
3693  int64_t MinOff, MaxOff;
3694  if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
3695                                      MaxOff))
3696    llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3697
3698  // Construct the complete offset.
3699  bool IsMulVL = ScaleValue.isScalable();
3700  unsigned Scale = ScaleValue.getKnownMinSize();
3701  int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes();
3702
3703  const MachineOperand &ImmOpnd =
3704      MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
3705  Offset += ImmOpnd.getImm() * Scale;
3706
3707  // If the offset doesn't match the scale, we rewrite the instruction to
3708  // use the unscaled instruction instead. Likewise, if we have a negative
3709  // offset and there is an unscaled op to use.
3710  Optional<unsigned> UnscaledOp =
3711      AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
3712  bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
3713  if (useUnscaledOp &&
3714      !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
3715                                      MaxOff))
3716    llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3717
3718  Scale = ScaleValue.getKnownMinSize();
3719  assert(IsMulVL == ScaleValue.isScalable() &&
3720         "Unscaled opcode has different value for scalable");
3721
3722  int64_t Remainder = Offset % Scale;
3723  assert(!(Remainder && useUnscaledOp) &&
3724         "Cannot have remainder when using unscaled op");
3725
3726  assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
3727  int64_t NewOffset = Offset / Scale;
3728  if (MinOff <= NewOffset && NewOffset <= MaxOff)
3729    Offset = Remainder;
3730  else {
3731    NewOffset = NewOffset < 0 ? MinOff : MaxOff;
3732    Offset = Offset - NewOffset * Scale + Remainder;
3733  }
3734
3735  if (EmittableOffset)
3736    *EmittableOffset = NewOffset;
3737  if (OutUseUnscaledOp)
3738    *OutUseUnscaledOp = useUnscaledOp;
3739  if (OutUnscaledOp && UnscaledOp)
3740    *OutUnscaledOp = *UnscaledOp;
3741
3742  if (IsMulVL)
3743    SOffset = StackOffset(Offset, MVT::nxv1i8) +
3744              StackOffset(SOffset.getBytes(), MVT::i8);
3745  else
3746    SOffset = StackOffset(Offset, MVT::i8) +
3747              StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
3748  return AArch64FrameOffsetCanUpdate |
3749         (SOffset ? 0 : AArch64FrameOffsetIsLegal);
3750}
3751
3752bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3753                                    unsigned FrameReg, StackOffset &Offset,
3754                                    const AArch64InstrInfo *TII) {
3755  unsigned Opcode = MI.getOpcode();
3756  unsigned ImmIdx = FrameRegIdx + 1;
3757
3758  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3759    Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
3760    emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3761                    MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3762                    MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3763    MI.eraseFromParent();
3764    Offset = StackOffset();
3765    return true;
3766  }
3767
3768  int64_t NewOffset;
3769  unsigned UnscaledOp;
3770  bool UseUnscaledOp;
3771  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3772                                         &UnscaledOp, &NewOffset);
3773  if (Status & AArch64FrameOffsetCanUpdate) {
3774    if (Status & AArch64FrameOffsetIsLegal)
3775      // Replace the FrameIndex with FrameReg.
3776      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3777    if (UseUnscaledOp)
3778      MI.setDesc(TII->get(UnscaledOp));
3779
3780    MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3781    return !Offset;
3782  }
3783
3784  return false;
3785}
3786
3787void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3788  NopInst.setOpcode(AArch64::HINT);
3789  NopInst.addOperand(MCOperand::createImm(0));
3790}
3791
3792// AArch64 supports MachineCombiner.
3793bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3794
3795// True when Opc sets flag
3796static bool isCombineInstrSettingFlag(unsigned Opc) {
3797  switch (Opc) {
3798  case AArch64::ADDSWrr:
3799  case AArch64::ADDSWri:
3800  case AArch64::ADDSXrr:
3801  case AArch64::ADDSXri:
3802  case AArch64::SUBSWrr:
3803  case AArch64::SUBSXrr:
3804  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3805  case AArch64::SUBSWri:
3806  case AArch64::SUBSXri:
3807    return true;
3808  default:
3809    break;
3810  }
3811  return false;
3812}
3813
3814// 32b Opcodes that can be combined with a MUL
3815static bool isCombineInstrCandidate32(unsigned Opc) {
3816  switch (Opc) {
3817  case AArch64::ADDWrr:
3818  case AArch64::ADDWri:
3819  case AArch64::SUBWrr:
3820  case AArch64::ADDSWrr:
3821  case AArch64::ADDSWri:
3822  case AArch64::SUBSWrr:
3823  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3824  case AArch64::SUBWri:
3825  case AArch64::SUBSWri:
3826    return true;
3827  default:
3828    break;
3829  }
3830  return false;
3831}
3832
3833// 64b Opcodes that can be combined with a MUL
3834static bool isCombineInstrCandidate64(unsigned Opc) {
3835  switch (Opc) {
3836  case AArch64::ADDXrr:
3837  case AArch64::ADDXri:
3838  case AArch64::SUBXrr:
3839  case AArch64::ADDSXrr:
3840  case AArch64::ADDSXri:
3841  case AArch64::SUBSXrr:
3842  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3843  case AArch64::SUBXri:
3844  case AArch64::SUBSXri:
3845  case AArch64::ADDv8i8:
3846  case AArch64::ADDv16i8:
3847  case AArch64::ADDv4i16:
3848  case AArch64::ADDv8i16:
3849  case AArch64::ADDv2i32:
3850  case AArch64::ADDv4i32:
3851  case AArch64::SUBv8i8:
3852  case AArch64::SUBv16i8:
3853  case AArch64::SUBv4i16:
3854  case AArch64::SUBv8i16:
3855  case AArch64::SUBv2i32:
3856  case AArch64::SUBv4i32:
3857    return true;
3858  default:
3859    break;
3860  }
3861  return false;
3862}
3863
3864// FP Opcodes that can be combined with a FMUL
3865static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3866  switch (Inst.getOpcode()) {
3867  default:
3868    break;
3869  case AArch64::FADDHrr:
3870  case AArch64::FADDSrr:
3871  case AArch64::FADDDrr:
3872  case AArch64::FADDv4f16:
3873  case AArch64::FADDv8f16:
3874  case AArch64::FADDv2f32:
3875  case AArch64::FADDv2f64:
3876  case AArch64::FADDv4f32:
3877  case AArch64::FSUBHrr:
3878  case AArch64::FSUBSrr:
3879  case AArch64::FSUBDrr:
3880  case AArch64::FSUBv4f16:
3881  case AArch64::FSUBv8f16:
3882  case AArch64::FSUBv2f32:
3883  case AArch64::FSUBv2f64:
3884  case AArch64::FSUBv4f32:
3885    TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3886    return (Options.UnsafeFPMath ||
3887            Options.AllowFPOpFusion == FPOpFusion::Fast);
3888  }
3889  return false;
3890}
3891
3892// Opcodes that can be combined with a MUL
3893static bool isCombineInstrCandidate(unsigned Opc) {
3894  return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3895}
3896
3897//
3898// Utility routine that checks if \param MO is defined by an
3899// \param CombineOpc instruction in the basic block \param MBB
3900static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3901                       unsigned CombineOpc, unsigned ZeroReg = 0,
3902                       bool CheckZeroReg = false) {
3903  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3904  MachineInstr *MI = nullptr;
3905
3906  if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
3907    MI = MRI.getUniqueVRegDef(MO.getReg());
3908  // And it needs to be in the trace (otherwise, it won't have a depth).
3909  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3910    return false;
3911  // Must only used by the user we combine with.
3912  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3913    return false;
3914
3915  if (CheckZeroReg) {
3916    assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3917           MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3918           MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3919    // The third input reg must be zero.
3920    if (MI->getOperand(3).getReg() != ZeroReg)
3921      return false;
3922  }
3923
3924  return true;
3925}
3926
3927//
3928// Is \param MO defined by an integer multiply and can be combined?
3929static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3930                              unsigned MulOpc, unsigned ZeroReg) {
3931  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3932}
3933
3934//
3935// Is \param MO defined by a floating-point multiply and can be combined?
3936static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3937                               unsigned MulOpc) {
3938  return canCombine(MBB, MO, MulOpc);
3939}
3940
3941// TODO: There are many more machine instruction opcodes to match:
3942//       1. Other data types (integer, vectors)
3943//       2. Other math / logic operations (xor, or)
3944//       3. Other forms of the same operation (intrinsics and other variants)
3945bool AArch64InstrInfo::isAssociativeAndCommutative(
3946    const MachineInstr &Inst) const {
3947  switch (Inst.getOpcode()) {
3948  case AArch64::FADDDrr:
3949  case AArch64::FADDSrr:
3950  case AArch64::FADDv2f32:
3951  case AArch64::FADDv2f64:
3952  case AArch64::FADDv4f32:
3953  case AArch64::FMULDrr:
3954  case AArch64::FMULSrr:
3955  case AArch64::FMULX32:
3956  case AArch64::FMULX64:
3957  case AArch64::FMULXv2f32:
3958  case AArch64::FMULXv2f64:
3959  case AArch64::FMULXv4f32:
3960  case AArch64::FMULv2f32:
3961  case AArch64::FMULv2f64:
3962  case AArch64::FMULv4f32:
3963    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3964  default:
3965    return false;
3966  }
3967}
3968
3969/// Find instructions that can be turned into madd.
3970static bool getMaddPatterns(MachineInstr &Root,
3971                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3972  unsigned Opc = Root.getOpcode();
3973  MachineBasicBlock &MBB = *Root.getParent();
3974  bool Found = false;
3975
3976  if (!isCombineInstrCandidate(Opc))
3977    return false;
3978  if (isCombineInstrSettingFlag(Opc)) {
3979    int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3980    // When NZCV is live bail out.
3981    if (Cmp_NZCV == -1)
3982      return false;
3983    unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3984    // When opcode can't change bail out.
3985    // CHECKME: do we miss any cases for opcode conversion?
3986    if (NewOpc == Opc)
3987      return false;
3988    Opc = NewOpc;
3989  }
3990
3991  auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
3992                      MachineCombinerPattern Pattern) {
3993    if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
3994      Patterns.push_back(Pattern);
3995      Found = true;
3996    }
3997  };
3998
3999  auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
4000    if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
4001      Patterns.push_back(Pattern);
4002      Found = true;
4003    }
4004  };
4005
4006  typedef MachineCombinerPattern MCP;
4007
4008  switch (Opc) {
4009  default:
4010    break;
4011  case AArch64::ADDWrr:
4012    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4013           "ADDWrr does not have register operands");
4014    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
4015    setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
4016    break;
4017  case AArch64::ADDXrr:
4018    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
4019    setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
4020    break;
4021  case AArch64::SUBWrr:
4022    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
4023    setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
4024    break;
4025  case AArch64::SUBXrr:
4026    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
4027    setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
4028    break;
4029  case AArch64::ADDWri:
4030    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
4031    break;
4032  case AArch64::ADDXri:
4033    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
4034    break;
4035  case AArch64::SUBWri:
4036    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
4037    break;
4038  case AArch64::SUBXri:
4039    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
4040    break;
4041  case AArch64::ADDv8i8:
4042    setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
4043    setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
4044    break;
4045  case AArch64::ADDv16i8:
4046    setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
4047    setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
4048    break;
4049  case AArch64::ADDv4i16:
4050    setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
4051    setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
4052    setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
4053    setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
4054    break;
4055  case AArch64::ADDv8i16:
4056    setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
4057    setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
4058    setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
4059    setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
4060    break;
4061  case AArch64::ADDv2i32:
4062    setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
4063    setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
4064    setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
4065    setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
4066    break;
4067  case AArch64::ADDv4i32:
4068    setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
4069    setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
4070    setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
4071    setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
4072    break;
4073  case AArch64::SUBv8i8:
4074    setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
4075    setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
4076    break;
4077  case AArch64::SUBv16i8:
4078    setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
4079    setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
4080    break;
4081  case AArch64::SUBv4i16:
4082    setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
4083    setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
4084    setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
4085    setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
4086    break;
4087  case AArch64::SUBv8i16:
4088    setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
4089    setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
4090    setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
4091    setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
4092    break;
4093  case AArch64::SUBv2i32:
4094    setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
4095    setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
4096    setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
4097    setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
4098    break;
4099  case AArch64::SUBv4i32:
4100    setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
4101    setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
4102    setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
4103    setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
4104    break;
4105  }
4106  return Found;
4107}
4108/// Floating-Point Support
4109
4110/// Find instructions that can be turned into madd.
4111static bool getFMAPatterns(MachineInstr &Root,
4112                           SmallVectorImpl<MachineCombinerPattern> &Patterns) {
4113
4114  if (!isCombineInstrCandidateFP(Root))
4115    return false;
4116
4117  MachineBasicBlock &MBB = *Root.getParent();
4118  bool Found = false;
4119
4120  auto Match = [&](int Opcode, int Operand,
4121                   MachineCombinerPattern Pattern) -> bool {
4122    if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
4123      Patterns.push_back(Pattern);
4124      return true;
4125    }
4126    return false;
4127  };
4128
4129  typedef MachineCombinerPattern MCP;
4130
4131  switch (Root.getOpcode()) {
4132  default:
4133    assert(false && "Unsupported FP instruction in combiner\n");
4134    break;
4135  case AArch64::FADDHrr:
4136    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4137           "FADDHrr does not have register operands");
4138
4139    Found  = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
4140    Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
4141    break;
4142  case AArch64::FADDSrr:
4143    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4144           "FADDSrr does not have register operands");
4145
4146    Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
4147             Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
4148
4149    Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
4150             Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
4151    break;
4152  case AArch64::FADDDrr:
4153    Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
4154             Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
4155
4156    Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
4157             Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
4158    break;
4159  case AArch64::FADDv4f16:
4160    Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
4161             Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
4162
4163    Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
4164             Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
4165    break;
4166  case AArch64::FADDv8f16:
4167    Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
4168             Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
4169
4170    Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
4171             Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
4172    break;
4173  case AArch64::FADDv2f32:
4174    Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
4175             Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
4176
4177    Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
4178             Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
4179    break;
4180  case AArch64::FADDv2f64:
4181    Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
4182             Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
4183
4184    Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
4185             Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
4186    break;
4187  case AArch64::FADDv4f32:
4188    Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
4189             Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
4190
4191    Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
4192             Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
4193    break;
4194  case AArch64::FSUBHrr:
4195    Found  = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
4196    Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
4197    Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
4198    break;
4199  case AArch64::FSUBSrr:
4200    Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
4201
4202    Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
4203             Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
4204
4205    Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
4206    break;
4207  case AArch64::FSUBDrr:
4208    Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
4209
4210    Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
4211             Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
4212
4213    Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
4214    break;
4215  case AArch64::FSUBv4f16:
4216    Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
4217             Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
4218
4219    Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
4220             Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
4221    break;
4222  case AArch64::FSUBv8f16:
4223    Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
4224             Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
4225
4226    Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
4227             Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
4228    break;
4229  case AArch64::FSUBv2f32:
4230    Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
4231             Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
4232
4233    Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
4234             Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
4235    break;
4236  case AArch64::FSUBv2f64:
4237    Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
4238             Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
4239
4240    Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
4241             Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
4242    break;
4243  case AArch64::FSUBv4f32:
4244    Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
4245             Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
4246
4247    Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
4248             Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
4249    break;
4250  }
4251  return Found;
4252}
4253
4254/// Return true when a code sequence can improve throughput. It
4255/// should be called only for instructions in loops.
4256/// \param Pattern - combiner pattern
4257bool AArch64InstrInfo::isThroughputPattern(
4258    MachineCombinerPattern Pattern) const {
4259  switch (Pattern) {
4260  default:
4261    break;
4262  case MachineCombinerPattern::FMULADDH_OP1:
4263  case MachineCombinerPattern::FMULADDH_OP2:
4264  case MachineCombinerPattern::FMULSUBH_OP1:
4265  case MachineCombinerPattern::FMULSUBH_OP2:
4266  case MachineCombinerPattern::FMULADDS_OP1:
4267  case MachineCombinerPattern::FMULADDS_OP2:
4268  case MachineCombinerPattern::FMULSUBS_OP1:
4269  case MachineCombinerPattern::FMULSUBS_OP2:
4270  case MachineCombinerPattern::FMULADDD_OP1:
4271  case MachineCombinerPattern::FMULADDD_OP2:
4272  case MachineCombinerPattern::FMULSUBD_OP1:
4273  case MachineCombinerPattern::FMULSUBD_OP2:
4274  case MachineCombinerPattern::FNMULSUBH_OP1:
4275  case MachineCombinerPattern::FNMULSUBS_OP1:
4276  case MachineCombinerPattern::FNMULSUBD_OP1:
4277  case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4278  case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4279  case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4280  case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4281  case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4282  case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4283  case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4284  case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4285  case MachineCombinerPattern::FMLAv4f16_OP2:
4286  case MachineCombinerPattern::FMLAv4f16_OP1:
4287  case MachineCombinerPattern::FMLAv8f16_OP1:
4288  case MachineCombinerPattern::FMLAv8f16_OP2:
4289  case MachineCombinerPattern::FMLAv2f32_OP2:
4290  case MachineCombinerPattern::FMLAv2f32_OP1:
4291  case MachineCombinerPattern::FMLAv2f64_OP1:
4292  case MachineCombinerPattern::FMLAv2f64_OP2:
4293  case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4294  case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4295  case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4296  case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4297  case MachineCombinerPattern::FMLAv4f32_OP1:
4298  case MachineCombinerPattern::FMLAv4f32_OP2:
4299  case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4300  case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4301  case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
4302  case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
4303  case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
4304  case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
4305  case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4306  case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4307  case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4308  case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4309  case MachineCombinerPattern::FMLSv4f16_OP1:
4310  case MachineCombinerPattern::FMLSv4f16_OP2:
4311  case MachineCombinerPattern::FMLSv8f16_OP1:
4312  case MachineCombinerPattern::FMLSv8f16_OP2:
4313  case MachineCombinerPattern::FMLSv2f32_OP2:
4314  case MachineCombinerPattern::FMLSv2f64_OP2:
4315  case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4316  case MachineCombinerPattern::FMLSv4f32_OP2:
4317  case MachineCombinerPattern::MULADDv8i8_OP1:
4318  case MachineCombinerPattern::MULADDv8i8_OP2:
4319  case MachineCombinerPattern::MULADDv16i8_OP1:
4320  case MachineCombinerPattern::MULADDv16i8_OP2:
4321  case MachineCombinerPattern::MULADDv4i16_OP1:
4322  case MachineCombinerPattern::MULADDv4i16_OP2:
4323  case MachineCombinerPattern::MULADDv8i16_OP1:
4324  case MachineCombinerPattern::MULADDv8i16_OP2:
4325  case MachineCombinerPattern::MULADDv2i32_OP1:
4326  case MachineCombinerPattern::MULADDv2i32_OP2:
4327  case MachineCombinerPattern::MULADDv4i32_OP1:
4328  case MachineCombinerPattern::MULADDv4i32_OP2:
4329  case MachineCombinerPattern::MULSUBv8i8_OP1:
4330  case MachineCombinerPattern::MULSUBv8i8_OP2:
4331  case MachineCombinerPattern::MULSUBv16i8_OP1:
4332  case MachineCombinerPattern::MULSUBv16i8_OP2:
4333  case MachineCombinerPattern::MULSUBv4i16_OP1:
4334  case MachineCombinerPattern::MULSUBv4i16_OP2:
4335  case MachineCombinerPattern::MULSUBv8i16_OP1:
4336  case MachineCombinerPattern::MULSUBv8i16_OP2:
4337  case MachineCombinerPattern::MULSUBv2i32_OP1:
4338  case MachineCombinerPattern::MULSUBv2i32_OP2:
4339  case MachineCombinerPattern::MULSUBv4i32_OP1:
4340  case MachineCombinerPattern::MULSUBv4i32_OP2:
4341  case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4342  case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4343  case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4344  case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4345  case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4346  case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4347  case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4348  case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4349  case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4350  case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4351  case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4352  case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4353  case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4354  case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4355  case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4356  case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4357    return true;
4358  } // end switch (Pattern)
4359  return false;
4360}
4361/// Return true when there is potentially a faster code sequence for an
4362/// instruction chain ending in \p Root. All potential patterns are listed in
4363/// the \p Pattern vector. Pattern should be sorted in priority order since the
4364/// pattern evaluator stops checking as soon as it finds a faster sequence.
4365
4366bool AArch64InstrInfo::getMachineCombinerPatterns(
4367    MachineInstr &Root,
4368    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4369  // Integer patterns
4370  if (getMaddPatterns(Root, Patterns))
4371    return true;
4372  // Floating point patterns
4373  if (getFMAPatterns(Root, Patterns))
4374    return true;
4375
4376  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4377}
4378
4379enum class FMAInstKind { Default, Indexed, Accumulator };
4380/// genFusedMultiply - Generate fused multiply instructions.
4381/// This function supports both integer and floating point instructions.
4382/// A typical example:
4383///  F|MUL I=A,B,0
4384///  F|ADD R,I,C
4385///  ==> F|MADD R,A,B,C
4386/// \param MF Containing MachineFunction
4387/// \param MRI Register information
4388/// \param TII Target information
4389/// \param Root is the F|ADD instruction
4390/// \param [out] InsInstrs is a vector of machine instructions and will
4391/// contain the generated madd instruction
4392/// \param IdxMulOpd is index of operand in Root that is the result of
4393/// the F|MUL. In the example above IdxMulOpd is 1.
4394/// \param MaddOpc the opcode fo the f|madd instruction
4395/// \param RC Register class of operands
4396/// \param kind of fma instruction (addressing mode) to be generated
4397/// \param ReplacedAddend is the result register from the instruction
4398/// replacing the non-combined operand, if any.
4399static MachineInstr *
4400genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
4401                 const TargetInstrInfo *TII, MachineInstr &Root,
4402                 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4403                 unsigned MaddOpc, const TargetRegisterClass *RC,
4404                 FMAInstKind kind = FMAInstKind::Default,
4405                 const Register *ReplacedAddend = nullptr) {
4406  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4407
4408  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4409  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4410  Register ResultReg = Root.getOperand(0).getReg();
4411  Register SrcReg0 = MUL->getOperand(1).getReg();
4412  bool Src0IsKill = MUL->getOperand(1).isKill();
4413  Register SrcReg1 = MUL->getOperand(2).getReg();
4414  bool Src1IsKill = MUL->getOperand(2).isKill();
4415
4416  unsigned SrcReg2;
4417  bool Src2IsKill;
4418  if (ReplacedAddend) {
4419    // If we just generated a new addend, we must be it's only use.
4420    SrcReg2 = *ReplacedAddend;
4421    Src2IsKill = true;
4422  } else {
4423    SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4424    Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4425  }
4426
4427  if (Register::isVirtualRegister(ResultReg))
4428    MRI.constrainRegClass(ResultReg, RC);
4429  if (Register::isVirtualRegister(SrcReg0))
4430    MRI.constrainRegClass(SrcReg0, RC);
4431  if (Register::isVirtualRegister(SrcReg1))
4432    MRI.constrainRegClass(SrcReg1, RC);
4433  if (Register::isVirtualRegister(SrcReg2))
4434    MRI.constrainRegClass(SrcReg2, RC);
4435
4436  MachineInstrBuilder MIB;
4437  if (kind == FMAInstKind::Default)
4438    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4439              .addReg(SrcReg0, getKillRegState(Src0IsKill))
4440              .addReg(SrcReg1, getKillRegState(Src1IsKill))
4441              .addReg(SrcReg2, getKillRegState(Src2IsKill));
4442  else if (kind == FMAInstKind::Indexed)
4443    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4444              .addReg(SrcReg2, getKillRegState(Src2IsKill))
4445              .addReg(SrcReg0, getKillRegState(Src0IsKill))
4446              .addReg(SrcReg1, getKillRegState(Src1IsKill))
4447              .addImm(MUL->getOperand(3).getImm());
4448  else if (kind == FMAInstKind::Accumulator)
4449    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4450              .addReg(SrcReg2, getKillRegState(Src2IsKill))
4451              .addReg(SrcReg0, getKillRegState(Src0IsKill))
4452              .addReg(SrcReg1, getKillRegState(Src1IsKill));
4453  else
4454    assert(false && "Invalid FMA instruction kind \n");
4455  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4456  InsInstrs.push_back(MIB);
4457  return MUL;
4458}
4459
4460/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
4461/// instructions.
4462///
4463/// \see genFusedMultiply
4464static MachineInstr *genFusedMultiplyAcc(
4465    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4466    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4467    unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4468  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4469                          FMAInstKind::Accumulator);
4470}
4471
4472/// genNeg - Helper to generate an intermediate negation of the second operand
4473/// of Root
4474static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
4475                       const TargetInstrInfo *TII, MachineInstr &Root,
4476                       SmallVectorImpl<MachineInstr *> &InsInstrs,
4477                       DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
4478                       unsigned MnegOpc, const TargetRegisterClass *RC) {
4479  Register NewVR = MRI.createVirtualRegister(RC);
4480  MachineInstrBuilder MIB =
4481      BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
4482          .add(Root.getOperand(2));
4483  InsInstrs.push_back(MIB);
4484
4485  assert(InstrIdxForVirtReg.empty());
4486  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4487
4488  return NewVR;
4489}
4490
4491/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4492/// instructions with an additional negation of the accumulator
4493static MachineInstr *genFusedMultiplyAccNeg(
4494    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4495    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4496    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4497    unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4498  assert(IdxMulOpd == 1);
4499
4500  Register NewVR =
4501      genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4502  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4503                          FMAInstKind::Accumulator, &NewVR);
4504}
4505
4506/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
4507/// instructions.
4508///
4509/// \see genFusedMultiply
4510static MachineInstr *genFusedMultiplyIdx(
4511    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4512    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4513    unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4514  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4515                          FMAInstKind::Indexed);
4516}
4517
4518/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4519/// instructions with an additional negation of the accumulator
4520static MachineInstr *genFusedMultiplyIdxNeg(
4521    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4522    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4523    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4524    unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4525  assert(IdxMulOpd == 1);
4526
4527  Register NewVR =
4528      genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4529
4530  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4531                          FMAInstKind::Indexed, &NewVR);
4532}
4533
4534/// genMaddR - Generate madd instruction and combine mul and add using
4535/// an extra virtual register
4536/// Example - an ADD intermediate needs to be stored in a register:
4537///   MUL I=A,B,0
4538///   ADD R,I,Imm
4539///   ==> ORR  V, ZR, Imm
4540///   ==> MADD R,A,B,V
4541/// \param MF Containing MachineFunction
4542/// \param MRI Register information
4543/// \param TII Target information
4544/// \param Root is the ADD instruction
4545/// \param [out] InsInstrs is a vector of machine instructions and will
4546/// contain the generated madd instruction
4547/// \param IdxMulOpd is index of operand in Root that is the result of
4548/// the MUL. In the example above IdxMulOpd is 1.
4549/// \param MaddOpc the opcode fo the madd instruction
4550/// \param VR is a virtual register that holds the value of an ADD operand
4551/// (V in the example above).
4552/// \param RC Register class of operands
4553static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4554                              const TargetInstrInfo *TII, MachineInstr &Root,
4555                              SmallVectorImpl<MachineInstr *> &InsInstrs,
4556                              unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4557                              const TargetRegisterClass *RC) {
4558  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4559
4560  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4561  Register ResultReg = Root.getOperand(0).getReg();
4562  Register SrcReg0 = MUL->getOperand(1).getReg();
4563  bool Src0IsKill = MUL->getOperand(1).isKill();
4564  Register SrcReg1 = MUL->getOperand(2).getReg();
4565  bool Src1IsKill = MUL->getOperand(2).isKill();
4566
4567  if (Register::isVirtualRegister(ResultReg))
4568    MRI.constrainRegClass(ResultReg, RC);
4569  if (Register::isVirtualRegister(SrcReg0))
4570    MRI.constrainRegClass(SrcReg0, RC);
4571  if (Register::isVirtualRegister(SrcReg1))
4572    MRI.constrainRegClass(SrcReg1, RC);
4573  if (Register::isVirtualRegister(VR))
4574    MRI.constrainRegClass(VR, RC);
4575
4576  MachineInstrBuilder MIB =
4577      BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4578          .addReg(SrcReg0, getKillRegState(Src0IsKill))
4579          .addReg(SrcReg1, getKillRegState(Src1IsKill))
4580          .addReg(VR);
4581  // Insert the MADD
4582  InsInstrs.push_back(MIB);
4583  return MUL;
4584}
4585
4586/// When getMachineCombinerPatterns() finds potential patterns,
4587/// this function generates the instructions that could replace the
4588/// original code sequence
4589void AArch64InstrInfo::genAlternativeCodeSequence(
4590    MachineInstr &Root, MachineCombinerPattern Pattern,
4591    SmallVectorImpl<MachineInstr *> &InsInstrs,
4592    SmallVectorImpl<MachineInstr *> &DelInstrs,
4593    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4594  MachineBasicBlock &MBB = *Root.getParent();
4595  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4596  MachineFunction &MF = *MBB.getParent();
4597  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4598
4599  MachineInstr *MUL;
4600  const TargetRegisterClass *RC;
4601  unsigned Opc;
4602  switch (Pattern) {
4603  default:
4604    // Reassociate instructions.
4605    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4606                                                DelInstrs, InstrIdxForVirtReg);
4607    return;
4608  case MachineCombinerPattern::MULADDW_OP1:
4609  case MachineCombinerPattern::MULADDX_OP1:
4610    // MUL I=A,B,0
4611    // ADD R,I,C
4612    // ==> MADD R,A,B,C
4613    // --- Create(MADD);
4614    if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4615      Opc = AArch64::MADDWrrr;
4616      RC = &AArch64::GPR32RegClass;
4617    } else {
4618      Opc = AArch64::MADDXrrr;
4619      RC = &AArch64::GPR64RegClass;
4620    }
4621    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4622    break;
4623  case MachineCombinerPattern::MULADDW_OP2:
4624  case MachineCombinerPattern::MULADDX_OP2:
4625    // MUL I=A,B,0
4626    // ADD R,C,I
4627    // ==> MADD R,A,B,C
4628    // --- Create(MADD);
4629    if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4630      Opc = AArch64::MADDWrrr;
4631      RC = &AArch64::GPR32RegClass;
4632    } else {
4633      Opc = AArch64::MADDXrrr;
4634      RC = &AArch64::GPR64RegClass;
4635    }
4636    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4637    break;
4638  case MachineCombinerPattern::MULADDWI_OP1:
4639  case MachineCombinerPattern::MULADDXI_OP1: {
4640    // MUL I=A,B,0
4641    // ADD R,I,Imm
4642    // ==> ORR  V, ZR, Imm
4643    // ==> MADD R,A,B,V
4644    // --- Create(MADD);
4645    const TargetRegisterClass *OrrRC;
4646    unsigned BitSize, OrrOpc, ZeroReg;
4647    if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4648      OrrOpc = AArch64::ORRWri;
4649      OrrRC = &AArch64::GPR32spRegClass;
4650      BitSize = 32;
4651      ZeroReg = AArch64::WZR;
4652      Opc = AArch64::MADDWrrr;
4653      RC = &AArch64::GPR32RegClass;
4654    } else {
4655      OrrOpc = AArch64::ORRXri;
4656      OrrRC = &AArch64::GPR64spRegClass;
4657      BitSize = 64;
4658      ZeroReg = AArch64::XZR;
4659      Opc = AArch64::MADDXrrr;
4660      RC = &AArch64::GPR64RegClass;
4661    }
4662    Register NewVR = MRI.createVirtualRegister(OrrRC);
4663    uint64_t Imm = Root.getOperand(2).getImm();
4664
4665    if (Root.getOperand(3).isImm()) {
4666      unsigned Val = Root.getOperand(3).getImm();
4667      Imm = Imm << Val;
4668    }
4669    uint64_t UImm = SignExtend64(Imm, BitSize);
4670    uint64_t Encoding;
4671    if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4672      MachineInstrBuilder MIB1 =
4673          BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4674              .addReg(ZeroReg)
4675              .addImm(Encoding);
4676      InsInstrs.push_back(MIB1);
4677      InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4678      MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4679    }
4680    break;
4681  }
4682  case MachineCombinerPattern::MULSUBW_OP1:
4683  case MachineCombinerPattern::MULSUBX_OP1: {
4684    // MUL I=A,B,0
4685    // SUB R,I, C
4686    // ==> SUB  V, 0, C
4687    // ==> MADD R,A,B,V // = -C + A*B
4688    // --- Create(MADD);
4689    const TargetRegisterClass *SubRC;
4690    unsigned SubOpc, ZeroReg;
4691    if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4692      SubOpc = AArch64::SUBWrr;
4693      SubRC = &AArch64::GPR32spRegClass;
4694      ZeroReg = AArch64::WZR;
4695      Opc = AArch64::MADDWrrr;
4696      RC = &AArch64::GPR32RegClass;
4697    } else {
4698      SubOpc = AArch64::SUBXrr;
4699      SubRC = &AArch64::GPR64spRegClass;
4700      ZeroReg = AArch64::XZR;
4701      Opc = AArch64::MADDXrrr;
4702      RC = &AArch64::GPR64RegClass;
4703    }
4704    Register NewVR = MRI.createVirtualRegister(SubRC);
4705    // SUB NewVR, 0, C
4706    MachineInstrBuilder MIB1 =
4707        BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4708            .addReg(ZeroReg)
4709            .add(Root.getOperand(2));
4710    InsInstrs.push_back(MIB1);
4711    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4712    MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4713    break;
4714  }
4715  case MachineCombinerPattern::MULSUBW_OP2:
4716  case MachineCombinerPattern::MULSUBX_OP2:
4717    // MUL I=A,B,0
4718    // SUB R,C,I
4719    // ==> MSUB R,A,B,C (computes C - A*B)
4720    // --- Create(MSUB);
4721    if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4722      Opc = AArch64::MSUBWrrr;
4723      RC = &AArch64::GPR32RegClass;
4724    } else {
4725      Opc = AArch64::MSUBXrrr;
4726      RC = &AArch64::GPR64RegClass;
4727    }
4728    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4729    break;
4730  case MachineCombinerPattern::MULSUBWI_OP1:
4731  case MachineCombinerPattern::MULSUBXI_OP1: {
4732    // MUL I=A,B,0
4733    // SUB R,I, Imm
4734    // ==> ORR  V, ZR, -Imm
4735    // ==> MADD R,A,B,V // = -Imm + A*B
4736    // --- Create(MADD);
4737    const TargetRegisterClass *OrrRC;
4738    unsigned BitSize, OrrOpc, ZeroReg;
4739    if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4740      OrrOpc = AArch64::ORRWri;
4741      OrrRC = &AArch64::GPR32spRegClass;
4742      BitSize = 32;
4743      ZeroReg = AArch64::WZR;
4744      Opc = AArch64::MADDWrrr;
4745      RC = &AArch64::GPR32RegClass;
4746    } else {
4747      OrrOpc = AArch64::ORRXri;
4748      OrrRC = &AArch64::GPR64spRegClass;
4749      BitSize = 64;
4750      ZeroReg = AArch64::XZR;
4751      Opc = AArch64::MADDXrrr;
4752      RC = &AArch64::GPR64RegClass;
4753    }
4754    Register NewVR = MRI.createVirtualRegister(OrrRC);
4755    uint64_t Imm = Root.getOperand(2).getImm();
4756    if (Root.getOperand(3).isImm()) {
4757      unsigned Val = Root.getOperand(3).getImm();
4758      Imm = Imm << Val;
4759    }
4760    uint64_t UImm = SignExtend64(-Imm, BitSize);
4761    uint64_t Encoding;
4762    if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4763      MachineInstrBuilder MIB1 =
4764          BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4765              .addReg(ZeroReg)
4766              .addImm(Encoding);
4767      InsInstrs.push_back(MIB1);
4768      InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4769      MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4770    }
4771    break;
4772  }
4773
4774  case MachineCombinerPattern::MULADDv8i8_OP1:
4775    Opc = AArch64::MLAv8i8;
4776    RC = &AArch64::FPR64RegClass;
4777    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4778    break;
4779  case MachineCombinerPattern::MULADDv8i8_OP2:
4780    Opc = AArch64::MLAv8i8;
4781    RC = &AArch64::FPR64RegClass;
4782    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4783    break;
4784  case MachineCombinerPattern::MULADDv16i8_OP1:
4785    Opc = AArch64::MLAv16i8;
4786    RC = &AArch64::FPR128RegClass;
4787    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4788    break;
4789  case MachineCombinerPattern::MULADDv16i8_OP2:
4790    Opc = AArch64::MLAv16i8;
4791    RC = &AArch64::FPR128RegClass;
4792    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4793    break;
4794  case MachineCombinerPattern::MULADDv4i16_OP1:
4795    Opc = AArch64::MLAv4i16;
4796    RC = &AArch64::FPR64RegClass;
4797    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4798    break;
4799  case MachineCombinerPattern::MULADDv4i16_OP2:
4800    Opc = AArch64::MLAv4i16;
4801    RC = &AArch64::FPR64RegClass;
4802    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4803    break;
4804  case MachineCombinerPattern::MULADDv8i16_OP1:
4805    Opc = AArch64::MLAv8i16;
4806    RC = &AArch64::FPR128RegClass;
4807    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4808    break;
4809  case MachineCombinerPattern::MULADDv8i16_OP2:
4810    Opc = AArch64::MLAv8i16;
4811    RC = &AArch64::FPR128RegClass;
4812    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4813    break;
4814  case MachineCombinerPattern::MULADDv2i32_OP1:
4815    Opc = AArch64::MLAv2i32;
4816    RC = &AArch64::FPR64RegClass;
4817    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4818    break;
4819  case MachineCombinerPattern::MULADDv2i32_OP2:
4820    Opc = AArch64::MLAv2i32;
4821    RC = &AArch64::FPR64RegClass;
4822    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4823    break;
4824  case MachineCombinerPattern::MULADDv4i32_OP1:
4825    Opc = AArch64::MLAv4i32;
4826    RC = &AArch64::FPR128RegClass;
4827    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4828    break;
4829  case MachineCombinerPattern::MULADDv4i32_OP2:
4830    Opc = AArch64::MLAv4i32;
4831    RC = &AArch64::FPR128RegClass;
4832    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4833    break;
4834
4835  case MachineCombinerPattern::MULSUBv8i8_OP1:
4836    Opc = AArch64::MLAv8i8;
4837    RC = &AArch64::FPR64RegClass;
4838    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4839                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
4840                                 RC);
4841    break;
4842  case MachineCombinerPattern::MULSUBv8i8_OP2:
4843    Opc = AArch64::MLSv8i8;
4844    RC = &AArch64::FPR64RegClass;
4845    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4846    break;
4847  case MachineCombinerPattern::MULSUBv16i8_OP1:
4848    Opc = AArch64::MLAv16i8;
4849    RC = &AArch64::FPR128RegClass;
4850    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4851                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
4852                                 RC);
4853    break;
4854  case MachineCombinerPattern::MULSUBv16i8_OP2:
4855    Opc = AArch64::MLSv16i8;
4856    RC = &AArch64::FPR128RegClass;
4857    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4858    break;
4859  case MachineCombinerPattern::MULSUBv4i16_OP1:
4860    Opc = AArch64::MLAv4i16;
4861    RC = &AArch64::FPR64RegClass;
4862    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4863                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4864                                 RC);
4865    break;
4866  case MachineCombinerPattern::MULSUBv4i16_OP2:
4867    Opc = AArch64::MLSv4i16;
4868    RC = &AArch64::FPR64RegClass;
4869    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4870    break;
4871  case MachineCombinerPattern::MULSUBv8i16_OP1:
4872    Opc = AArch64::MLAv8i16;
4873    RC = &AArch64::FPR128RegClass;
4874    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4875                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4876                                 RC);
4877    break;
4878  case MachineCombinerPattern::MULSUBv8i16_OP2:
4879    Opc = AArch64::MLSv8i16;
4880    RC = &AArch64::FPR128RegClass;
4881    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4882    break;
4883  case MachineCombinerPattern::MULSUBv2i32_OP1:
4884    Opc = AArch64::MLAv2i32;
4885    RC = &AArch64::FPR64RegClass;
4886    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4887                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4888                                 RC);
4889    break;
4890  case MachineCombinerPattern::MULSUBv2i32_OP2:
4891    Opc = AArch64::MLSv2i32;
4892    RC = &AArch64::FPR64RegClass;
4893    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4894    break;
4895  case MachineCombinerPattern::MULSUBv4i32_OP1:
4896    Opc = AArch64::MLAv4i32;
4897    RC = &AArch64::FPR128RegClass;
4898    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4899                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4900                                 RC);
4901    break;
4902  case MachineCombinerPattern::MULSUBv4i32_OP2:
4903    Opc = AArch64::MLSv4i32;
4904    RC = &AArch64::FPR128RegClass;
4905    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4906    break;
4907
4908  case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4909    Opc = AArch64::MLAv4i16_indexed;
4910    RC = &AArch64::FPR64RegClass;
4911    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4912    break;
4913  case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4914    Opc = AArch64::MLAv4i16_indexed;
4915    RC = &AArch64::FPR64RegClass;
4916    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4917    break;
4918  case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4919    Opc = AArch64::MLAv8i16_indexed;
4920    RC = &AArch64::FPR128RegClass;
4921    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4922    break;
4923  case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4924    Opc = AArch64::MLAv8i16_indexed;
4925    RC = &AArch64::FPR128RegClass;
4926    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4927    break;
4928  case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4929    Opc = AArch64::MLAv2i32_indexed;
4930    RC = &AArch64::FPR64RegClass;
4931    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4932    break;
4933  case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4934    Opc = AArch64::MLAv2i32_indexed;
4935    RC = &AArch64::FPR64RegClass;
4936    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4937    break;
4938  case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4939    Opc = AArch64::MLAv4i32_indexed;
4940    RC = &AArch64::FPR128RegClass;
4941    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4942    break;
4943  case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4944    Opc = AArch64::MLAv4i32_indexed;
4945    RC = &AArch64::FPR128RegClass;
4946    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4947    break;
4948
4949  case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4950    Opc = AArch64::MLAv4i16_indexed;
4951    RC = &AArch64::FPR64RegClass;
4952    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4953                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4954                                 RC);
4955    break;
4956  case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4957    Opc = AArch64::MLSv4i16_indexed;
4958    RC = &AArch64::FPR64RegClass;
4959    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4960    break;
4961  case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4962    Opc = AArch64::MLAv8i16_indexed;
4963    RC = &AArch64::FPR128RegClass;
4964    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4965                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4966                                 RC);
4967    break;
4968  case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4969    Opc = AArch64::MLSv8i16_indexed;
4970    RC = &AArch64::FPR128RegClass;
4971    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4972    break;
4973  case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4974    Opc = AArch64::MLAv2i32_indexed;
4975    RC = &AArch64::FPR64RegClass;
4976    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4977                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4978                                 RC);
4979    break;
4980  case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4981    Opc = AArch64::MLSv2i32_indexed;
4982    RC = &AArch64::FPR64RegClass;
4983    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4984    break;
4985  case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4986    Opc = AArch64::MLAv4i32_indexed;
4987    RC = &AArch64::FPR128RegClass;
4988    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4989                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4990                                 RC);
4991    break;
4992  case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4993    Opc = AArch64::MLSv4i32_indexed;
4994    RC = &AArch64::FPR128RegClass;
4995    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4996    break;
4997
4998  // Floating Point Support
4999  case MachineCombinerPattern::FMULADDH_OP1:
5000    Opc = AArch64::FMADDHrrr;
5001    RC = &AArch64::FPR16RegClass;
5002    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5003    break;
5004  case MachineCombinerPattern::FMULADDS_OP1:
5005    Opc = AArch64::FMADDSrrr;
5006    RC = &AArch64::FPR32RegClass;
5007    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5008    break;
5009  case MachineCombinerPattern::FMULADDD_OP1:
5010    Opc = AArch64::FMADDDrrr;
5011    RC = &AArch64::FPR64RegClass;
5012    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5013    break;
5014
5015  case MachineCombinerPattern::FMULADDH_OP2:
5016    Opc = AArch64::FMADDHrrr;
5017    RC = &AArch64::FPR16RegClass;
5018    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5019    break;
5020  case MachineCombinerPattern::FMULADDS_OP2:
5021    Opc = AArch64::FMADDSrrr;
5022    RC = &AArch64::FPR32RegClass;
5023    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5024    break;
5025  case MachineCombinerPattern::FMULADDD_OP2:
5026    Opc = AArch64::FMADDDrrr;
5027    RC = &AArch64::FPR64RegClass;
5028    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5029    break;
5030
5031  case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
5032    Opc = AArch64::FMLAv1i32_indexed;
5033    RC = &AArch64::FPR32RegClass;
5034    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5035                           FMAInstKind::Indexed);
5036    break;
5037  case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
5038    Opc = AArch64::FMLAv1i32_indexed;
5039    RC = &AArch64::FPR32RegClass;
5040    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5041                           FMAInstKind::Indexed);
5042    break;
5043
5044  case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
5045    Opc = AArch64::FMLAv1i64_indexed;
5046    RC = &AArch64::FPR64RegClass;
5047    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5048                           FMAInstKind::Indexed);
5049    break;
5050  case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
5051    Opc = AArch64::FMLAv1i64_indexed;
5052    RC = &AArch64::FPR64RegClass;
5053    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5054                           FMAInstKind::Indexed);
5055    break;
5056
5057  case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
5058    RC = &AArch64::FPR64RegClass;
5059    Opc = AArch64::FMLAv4i16_indexed;
5060    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5061                           FMAInstKind::Indexed);
5062    break;
5063  case MachineCombinerPattern::FMLAv4f16_OP1:
5064    RC = &AArch64::FPR64RegClass;
5065    Opc = AArch64::FMLAv4f16;
5066    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5067                           FMAInstKind::Accumulator);
5068    break;
5069  case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
5070    RC = &AArch64::FPR64RegClass;
5071    Opc = AArch64::FMLAv4i16_indexed;
5072    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5073                           FMAInstKind::Indexed);
5074    break;
5075  case MachineCombinerPattern::FMLAv4f16_OP2:
5076    RC = &AArch64::FPR64RegClass;
5077    Opc = AArch64::FMLAv4f16;
5078    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5079                           FMAInstKind::Accumulator);
5080    break;
5081
5082  case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
5083  case MachineCombinerPattern::FMLAv2f32_OP1:
5084    RC = &AArch64::FPR64RegClass;
5085    if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
5086      Opc = AArch64::FMLAv2i32_indexed;
5087      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5088                             FMAInstKind::Indexed);
5089    } else {
5090      Opc = AArch64::FMLAv2f32;
5091      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5092                             FMAInstKind::Accumulator);
5093    }
5094    break;
5095  case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
5096  case MachineCombinerPattern::FMLAv2f32_OP2:
5097    RC = &AArch64::FPR64RegClass;
5098    if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
5099      Opc = AArch64::FMLAv2i32_indexed;
5100      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5101                             FMAInstKind::Indexed);
5102    } else {
5103      Opc = AArch64::FMLAv2f32;
5104      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5105                             FMAInstKind::Accumulator);
5106    }
5107    break;
5108
5109  case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
5110    RC = &AArch64::FPR128RegClass;
5111    Opc = AArch64::FMLAv8i16_indexed;
5112    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5113                           FMAInstKind::Indexed);
5114    break;
5115  case MachineCombinerPattern::FMLAv8f16_OP1:
5116    RC = &AArch64::FPR128RegClass;
5117    Opc = AArch64::FMLAv8f16;
5118    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5119                           FMAInstKind::Accumulator);
5120    break;
5121  case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
5122    RC = &AArch64::FPR128RegClass;
5123    Opc = AArch64::FMLAv8i16_indexed;
5124    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5125                           FMAInstKind::Indexed);
5126    break;
5127  case MachineCombinerPattern::FMLAv8f16_OP2:
5128    RC = &AArch64::FPR128RegClass;
5129    Opc = AArch64::FMLAv8f16;
5130    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5131                           FMAInstKind::Accumulator);
5132    break;
5133
5134  case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
5135  case MachineCombinerPattern::FMLAv2f64_OP1:
5136    RC = &AArch64::FPR128RegClass;
5137    if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
5138      Opc = AArch64::FMLAv2i64_indexed;
5139      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5140                             FMAInstKind::Indexed);
5141    } else {
5142      Opc = AArch64::FMLAv2f64;
5143      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5144                             FMAInstKind::Accumulator);
5145    }
5146    break;
5147  case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
5148  case MachineCombinerPattern::FMLAv2f64_OP2:
5149    RC = &AArch64::FPR128RegClass;
5150    if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
5151      Opc = AArch64::FMLAv2i64_indexed;
5152      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5153                             FMAInstKind::Indexed);
5154    } else {
5155      Opc = AArch64::FMLAv2f64;
5156      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5157                             FMAInstKind::Accumulator);
5158    }
5159    break;
5160
5161  case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
5162  case MachineCombinerPattern::FMLAv4f32_OP1:
5163    RC = &AArch64::FPR128RegClass;
5164    if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
5165      Opc = AArch64::FMLAv4i32_indexed;
5166      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5167                             FMAInstKind::Indexed);
5168    } else {
5169      Opc = AArch64::FMLAv4f32;
5170      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5171                             FMAInstKind::Accumulator);
5172    }
5173    break;
5174
5175  case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
5176  case MachineCombinerPattern::FMLAv4f32_OP2:
5177    RC = &AArch64::FPR128RegClass;
5178    if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
5179      Opc = AArch64::FMLAv4i32_indexed;
5180      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5181                             FMAInstKind::Indexed);
5182    } else {
5183      Opc = AArch64::FMLAv4f32;
5184      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5185                             FMAInstKind::Accumulator);
5186    }
5187    break;
5188
5189  case MachineCombinerPattern::FMULSUBH_OP1:
5190    Opc = AArch64::FNMSUBHrrr;
5191    RC = &AArch64::FPR16RegClass;
5192    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5193    break;
5194  case MachineCombinerPattern::FMULSUBS_OP1:
5195    Opc = AArch64::FNMSUBSrrr;
5196    RC = &AArch64::FPR32RegClass;
5197    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5198    break;
5199  case MachineCombinerPattern::FMULSUBD_OP1:
5200    Opc = AArch64::FNMSUBDrrr;
5201    RC = &AArch64::FPR64RegClass;
5202    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5203    break;
5204
5205  case MachineCombinerPattern::FNMULSUBH_OP1:
5206    Opc = AArch64::FNMADDHrrr;
5207    RC = &AArch64::FPR16RegClass;
5208    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5209    break;
5210  case MachineCombinerPattern::FNMULSUBS_OP1:
5211    Opc = AArch64::FNMADDSrrr;
5212    RC = &AArch64::FPR32RegClass;
5213    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5214    break;
5215  case MachineCombinerPattern::FNMULSUBD_OP1:
5216    Opc = AArch64::FNMADDDrrr;
5217    RC = &AArch64::FPR64RegClass;
5218    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5219    break;
5220
5221  case MachineCombinerPattern::FMULSUBH_OP2:
5222    Opc = AArch64::FMSUBHrrr;
5223    RC = &AArch64::FPR16RegClass;
5224    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5225    break;
5226  case MachineCombinerPattern::FMULSUBS_OP2:
5227    Opc = AArch64::FMSUBSrrr;
5228    RC = &AArch64::FPR32RegClass;
5229    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5230    break;
5231  case MachineCombinerPattern::FMULSUBD_OP2:
5232    Opc = AArch64::FMSUBDrrr;
5233    RC = &AArch64::FPR64RegClass;
5234    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5235    break;
5236
5237  case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
5238    Opc = AArch64::FMLSv1i32_indexed;
5239    RC = &AArch64::FPR32RegClass;
5240    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5241                           FMAInstKind::Indexed);
5242    break;
5243
5244  case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
5245    Opc = AArch64::FMLSv1i64_indexed;
5246    RC = &AArch64::FPR64RegClass;
5247    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5248                           FMAInstKind::Indexed);
5249    break;
5250
5251  case MachineCombinerPattern::FMLSv4f16_OP1:
5252  case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
5253    RC = &AArch64::FPR64RegClass;
5254    Register NewVR = MRI.createVirtualRegister(RC);
5255    MachineInstrBuilder MIB1 =
5256        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
5257            .add(Root.getOperand(2));
5258    InsInstrs.push_back(MIB1);
5259    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5260    if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
5261      Opc = AArch64::FMLAv4f16;
5262      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5263                             FMAInstKind::Accumulator, &NewVR);
5264    } else {
5265      Opc = AArch64::FMLAv4i16_indexed;
5266      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5267                             FMAInstKind::Indexed, &NewVR);
5268    }
5269    break;
5270  }
5271  case MachineCombinerPattern::FMLSv4f16_OP2:
5272    RC = &AArch64::FPR64RegClass;
5273    Opc = AArch64::FMLSv4f16;
5274    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5275                           FMAInstKind::Accumulator);
5276    break;
5277  case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
5278    RC = &AArch64::FPR64RegClass;
5279    Opc = AArch64::FMLSv4i16_indexed;
5280    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5281                           FMAInstKind::Indexed);
5282    break;
5283
5284  case MachineCombinerPattern::FMLSv2f32_OP2:
5285  case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
5286    RC = &AArch64::FPR64RegClass;
5287    if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
5288      Opc = AArch64::FMLSv2i32_indexed;
5289      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5290                             FMAInstKind::Indexed);
5291    } else {
5292      Opc = AArch64::FMLSv2f32;
5293      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5294                             FMAInstKind::Accumulator);
5295    }
5296    break;
5297
5298  case MachineCombinerPattern::FMLSv8f16_OP1:
5299  case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
5300    RC = &AArch64::FPR128RegClass;
5301    Register NewVR = MRI.createVirtualRegister(RC);
5302    MachineInstrBuilder MIB1 =
5303        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
5304            .add(Root.getOperand(2));
5305    InsInstrs.push_back(MIB1);
5306    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5307    if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
5308      Opc = AArch64::FMLAv8f16;
5309      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5310                             FMAInstKind::Accumulator, &NewVR);
5311    } else {
5312      Opc = AArch64::FMLAv8i16_indexed;
5313      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5314                             FMAInstKind::Indexed, &NewVR);
5315    }
5316    break;
5317  }
5318  case MachineCombinerPattern::FMLSv8f16_OP2:
5319    RC = &AArch64::FPR128RegClass;
5320    Opc = AArch64::FMLSv8f16;
5321    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5322                           FMAInstKind::Accumulator);
5323    break;
5324  case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
5325    RC = &AArch64::FPR128RegClass;
5326    Opc = AArch64::FMLSv8i16_indexed;
5327    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5328                           FMAInstKind::Indexed);
5329    break;
5330
5331  case MachineCombinerPattern::FMLSv2f64_OP2:
5332  case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
5333    RC = &AArch64::FPR128RegClass;
5334    if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
5335      Opc = AArch64::FMLSv2i64_indexed;
5336      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5337                             FMAInstKind::Indexed);
5338    } else {
5339      Opc = AArch64::FMLSv2f64;
5340      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5341                             FMAInstKind::Accumulator);
5342    }
5343    break;
5344
5345  case MachineCombinerPattern::FMLSv4f32_OP2:
5346  case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
5347    RC = &AArch64::FPR128RegClass;
5348    if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
5349      Opc = AArch64::FMLSv4i32_indexed;
5350      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5351                             FMAInstKind::Indexed);
5352    } else {
5353      Opc = AArch64::FMLSv4f32;
5354      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5355                             FMAInstKind::Accumulator);
5356    }
5357    break;
5358  case MachineCombinerPattern::FMLSv2f32_OP1:
5359  case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
5360    RC = &AArch64::FPR64RegClass;
5361    Register NewVR = MRI.createVirtualRegister(RC);
5362    MachineInstrBuilder MIB1 =
5363        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
5364            .add(Root.getOperand(2));
5365    InsInstrs.push_back(MIB1);
5366    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5367    if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
5368      Opc = AArch64::FMLAv2i32_indexed;
5369      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5370                             FMAInstKind::Indexed, &NewVR);
5371    } else {
5372      Opc = AArch64::FMLAv2f32;
5373      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5374                             FMAInstKind::Accumulator, &NewVR);
5375    }
5376    break;
5377  }
5378  case MachineCombinerPattern::FMLSv4f32_OP1:
5379  case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
5380    RC = &AArch64::FPR128RegClass;
5381    Register NewVR = MRI.createVirtualRegister(RC);
5382    MachineInstrBuilder MIB1 =
5383        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
5384            .add(Root.getOperand(2));
5385    InsInstrs.push_back(MIB1);
5386    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5387    if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
5388      Opc = AArch64::FMLAv4i32_indexed;
5389      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5390                             FMAInstKind::Indexed, &NewVR);
5391    } else {
5392      Opc = AArch64::FMLAv4f32;
5393      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5394                             FMAInstKind::Accumulator, &NewVR);
5395    }
5396    break;
5397  }
5398  case MachineCombinerPattern::FMLSv2f64_OP1:
5399  case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
5400    RC = &AArch64::FPR128RegClass;
5401    Register NewVR = MRI.createVirtualRegister(RC);
5402    MachineInstrBuilder MIB1 =
5403        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
5404            .add(Root.getOperand(2));
5405    InsInstrs.push_back(MIB1);
5406    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5407    if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
5408      Opc = AArch64::FMLAv2i64_indexed;
5409      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5410                             FMAInstKind::Indexed, &NewVR);
5411    } else {
5412      Opc = AArch64::FMLAv2f64;
5413      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5414                             FMAInstKind::Accumulator, &NewVR);
5415    }
5416    break;
5417  }
5418  } // end switch (Pattern)
5419  // Record MUL and ADD/SUB for deletion
5420  DelInstrs.push_back(MUL);
5421  DelInstrs.push_back(&Root);
5422}
5423
5424/// Replace csincr-branch sequence by simple conditional branch
5425///
5426/// Examples:
5427/// 1. \code
5428///   csinc  w9, wzr, wzr, <condition code>
5429///   tbnz   w9, #0, 0x44
5430///    \endcode
5431/// to
5432///    \code
5433///   b.<inverted condition code>
5434///    \endcode
5435///
5436/// 2. \code
5437///   csinc w9, wzr, wzr, <condition code>
5438///   tbz   w9, #0, 0x44
5439///    \endcode
5440/// to
5441///    \code
5442///   b.<condition code>
5443///    \endcode
5444///
5445/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
5446/// compare's constant operand is power of 2.
5447///
5448/// Examples:
5449///    \code
5450///   and  w8, w8, #0x400
5451///   cbnz w8, L1
5452///    \endcode
5453/// to
5454///    \code
5455///   tbnz w8, #10, L1
5456///    \endcode
5457///
5458/// \param  MI Conditional Branch
5459/// \return True when the simple conditional branch is generated
5460///
5461bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
5462  bool IsNegativeBranch = false;
5463  bool IsTestAndBranch = false;
5464  unsigned TargetBBInMI = 0;
5465  switch (MI.getOpcode()) {
5466  default:
5467    llvm_unreachable("Unknown branch instruction?");
5468  case AArch64::Bcc:
5469    return false;
5470  case AArch64::CBZW:
5471  case AArch64::CBZX:
5472    TargetBBInMI = 1;
5473    break;
5474  case AArch64::CBNZW:
5475  case AArch64::CBNZX:
5476    TargetBBInMI = 1;
5477    IsNegativeBranch = true;
5478    break;
5479  case AArch64::TBZW:
5480  case AArch64::TBZX:
5481    TargetBBInMI = 2;
5482    IsTestAndBranch = true;
5483    break;
5484  case AArch64::TBNZW:
5485  case AArch64::TBNZX:
5486    TargetBBInMI = 2;
5487    IsNegativeBranch = true;
5488    IsTestAndBranch = true;
5489    break;
5490  }
5491  // So we increment a zero register and test for bits other
5492  // than bit 0? Conservatively bail out in case the verifier
5493  // missed this case.
5494  if (IsTestAndBranch && MI.getOperand(1).getImm())
5495    return false;
5496
5497  // Find Definition.
5498  assert(MI.getParent() && "Incomplete machine instruciton\n");
5499  MachineBasicBlock *MBB = MI.getParent();
5500  MachineFunction *MF = MBB->getParent();
5501  MachineRegisterInfo *MRI = &MF->getRegInfo();
5502  Register VReg = MI.getOperand(0).getReg();
5503  if (!Register::isVirtualRegister(VReg))
5504    return false;
5505
5506  MachineInstr *DefMI = MRI->getVRegDef(VReg);
5507
5508  // Look through COPY instructions to find definition.
5509  while (DefMI->isCopy()) {
5510    Register CopyVReg = DefMI->getOperand(1).getReg();
5511    if (!MRI->hasOneNonDBGUse(CopyVReg))
5512      return false;
5513    if (!MRI->hasOneDef(CopyVReg))
5514      return false;
5515    DefMI = MRI->getVRegDef(CopyVReg);
5516  }
5517
5518  switch (DefMI->getOpcode()) {
5519  default:
5520    return false;
5521  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
5522  case AArch64::ANDWri:
5523  case AArch64::ANDXri: {
5524    if (IsTestAndBranch)
5525      return false;
5526    if (DefMI->getParent() != MBB)
5527      return false;
5528    if (!MRI->hasOneNonDBGUse(VReg))
5529      return false;
5530
5531    bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
5532    uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
5533        DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
5534    if (!isPowerOf2_64(Mask))
5535      return false;
5536
5537    MachineOperand &MO = DefMI->getOperand(1);
5538    Register NewReg = MO.getReg();
5539    if (!Register::isVirtualRegister(NewReg))
5540      return false;
5541
5542    assert(!MRI->def_empty(NewReg) && "Register must be defined.");
5543
5544    MachineBasicBlock &RefToMBB = *MBB;
5545    MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
5546    DebugLoc DL = MI.getDebugLoc();
5547    unsigned Imm = Log2_64(Mask);
5548    unsigned Opc = (Imm < 32)
5549                       ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
5550                       : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
5551    MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
5552                              .addReg(NewReg)
5553                              .addImm(Imm)
5554                              .addMBB(TBB);
5555    // Register lives on to the CBZ now.
5556    MO.setIsKill(false);
5557
5558    // For immediate smaller than 32, we need to use the 32-bit
5559    // variant (W) in all cases. Indeed the 64-bit variant does not
5560    // allow to encode them.
5561    // Therefore, if the input register is 64-bit, we need to take the
5562    // 32-bit sub-part.
5563    if (!Is32Bit && Imm < 32)
5564      NewMI->getOperand(0).setSubReg(AArch64::sub_32);
5565    MI.eraseFromParent();
5566    return true;
5567  }
5568  // Look for CSINC
5569  case AArch64::CSINCWr:
5570  case AArch64::CSINCXr: {
5571    if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
5572          DefMI->getOperand(2).getReg() == AArch64::WZR) &&
5573        !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
5574          DefMI->getOperand(2).getReg() == AArch64::XZR))
5575      return false;
5576
5577    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
5578      return false;
5579
5580    AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
5581    // Convert only when the condition code is not modified between
5582    // the CSINC and the branch. The CC may be used by other
5583    // instructions in between.
5584    if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
5585      return false;
5586    MachineBasicBlock &RefToMBB = *MBB;
5587    MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
5588    DebugLoc DL = MI.getDebugLoc();
5589    if (IsNegativeBranch)
5590      CC = AArch64CC::getInvertedCondCode(CC);
5591    BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
5592    MI.eraseFromParent();
5593    return true;
5594  }
5595  }
5596}
5597
5598std::pair<unsigned, unsigned>
5599AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
5600  const unsigned Mask = AArch64II::MO_FRAGMENT;
5601  return std::make_pair(TF & Mask, TF & ~Mask);
5602}
5603
5604ArrayRef<std::pair<unsigned, const char *>>
5605AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5606  using namespace AArch64II;
5607
5608  static const std::pair<unsigned, const char *> TargetFlags[] = {
5609      {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
5610      {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
5611      {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
5612      {MO_HI12, "aarch64-hi12"}};
5613  return makeArrayRef(TargetFlags);
5614}
5615
5616ArrayRef<std::pair<unsigned, const char *>>
5617AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5618  using namespace AArch64II;
5619
5620  static const std::pair<unsigned, const char *> TargetFlags[] = {
5621      {MO_COFFSTUB, "aarch64-coffstub"},
5622      {MO_GOT, "aarch64-got"},
5623      {MO_NC, "aarch64-nc"},
5624      {MO_S, "aarch64-s"},
5625      {MO_TLS, "aarch64-tls"},
5626      {MO_DLLIMPORT, "aarch64-dllimport"},
5627      {MO_PREL, "aarch64-prel"},
5628      {MO_TAGGED, "aarch64-tagged"}};
5629  return makeArrayRef(TargetFlags);
5630}
5631
5632ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
5633AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
5634  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
5635      {{MOSuppressPair, "aarch64-suppress-pair"},
5636       {MOStridedAccess, "aarch64-strided-access"}};
5637  return makeArrayRef(TargetFlags);
5638}
5639
5640/// Constants defining how certain sequences should be outlined.
5641/// This encompasses how an outlined function should be called, and what kind of
5642/// frame should be emitted for that outlined function.
5643///
5644/// \p MachineOutlinerDefault implies that the function should be called with
5645/// a save and restore of LR to the stack.
5646///
5647/// That is,
5648///
5649/// I1     Save LR                    OUTLINED_FUNCTION:
5650/// I2 --> BL OUTLINED_FUNCTION       I1
5651/// I3     Restore LR                 I2
5652///                                   I3
5653///                                   RET
5654///
5655/// * Call construction overhead: 3 (save + BL + restore)
5656/// * Frame construction overhead: 1 (ret)
5657/// * Requires stack fixups? Yes
5658///
5659/// \p MachineOutlinerTailCall implies that the function is being created from
5660/// a sequence of instructions ending in a return.
5661///
5662/// That is,
5663///
5664/// I1                             OUTLINED_FUNCTION:
5665/// I2 --> B OUTLINED_FUNCTION     I1
5666/// RET                            I2
5667///                                RET
5668///
5669/// * Call construction overhead: 1 (B)
5670/// * Frame construction overhead: 0 (Return included in sequence)
5671/// * Requires stack fixups? No
5672///
5673/// \p MachineOutlinerNoLRSave implies that the function should be called using
5674/// a BL instruction, but doesn't require LR to be saved and restored. This
5675/// happens when LR is known to be dead.
5676///
5677/// That is,
5678///
5679/// I1                                OUTLINED_FUNCTION:
5680/// I2 --> BL OUTLINED_FUNCTION       I1
5681/// I3                                I2
5682///                                   I3
5683///                                   RET
5684///
5685/// * Call construction overhead: 1 (BL)
5686/// * Frame construction overhead: 1 (RET)
5687/// * Requires stack fixups? No
5688///
5689/// \p MachineOutlinerThunk implies that the function is being created from
5690/// a sequence of instructions ending in a call. The outlined function is
5691/// called with a BL instruction, and the outlined function tail-calls the
5692/// original call destination.
5693///
5694/// That is,
5695///
5696/// I1                                OUTLINED_FUNCTION:
5697/// I2 --> BL OUTLINED_FUNCTION       I1
5698/// BL f                              I2
5699///                                   B f
5700/// * Call construction overhead: 1 (BL)
5701/// * Frame construction overhead: 0
5702/// * Requires stack fixups? No
5703///
5704/// \p MachineOutlinerRegSave implies that the function should be called with a
5705/// save and restore of LR to an available register. This allows us to avoid
5706/// stack fixups. Note that this outlining variant is compatible with the
5707/// NoLRSave case.
5708///
5709/// That is,
5710///
5711/// I1     Save LR                    OUTLINED_FUNCTION:
5712/// I2 --> BL OUTLINED_FUNCTION       I1
5713/// I3     Restore LR                 I2
5714///                                   I3
5715///                                   RET
5716///
5717/// * Call construction overhead: 3 (save + BL + restore)
5718/// * Frame construction overhead: 1 (ret)
5719/// * Requires stack fixups? No
5720enum MachineOutlinerClass {
5721  MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
5722  MachineOutlinerTailCall, /// Only emit a branch.
5723  MachineOutlinerNoLRSave, /// Emit a call and return.
5724  MachineOutlinerThunk,    /// Emit a call and tail-call.
5725  MachineOutlinerRegSave   /// Same as default, but save to a register.
5726};
5727
5728enum MachineOutlinerMBBFlags {
5729  LRUnavailableSomewhere = 0x2,
5730  HasCalls = 0x4,
5731  UnsafeRegsDead = 0x8
5732};
5733
5734unsigned
5735AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
5736  assert(C.LRUWasSet && "LRU wasn't set?");
5737  MachineFunction *MF = C.getMF();
5738  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5739      MF->getSubtarget().getRegisterInfo());
5740
5741  // Check if there is an available register across the sequence that we can
5742  // use.
5743  for (unsigned Reg : AArch64::GPR64RegClass) {
5744    if (!ARI->isReservedReg(*MF, Reg) &&
5745        Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
5746        Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
5747        Reg != AArch64::X17 && // Ditto for X17.
5748        C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
5749      return Reg;
5750  }
5751
5752  // No suitable register. Return 0.
5753  return 0u;
5754}
5755
5756static bool
5757outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
5758                                         const outliner::Candidate &b) {
5759  const Function &Fa = a.getMF()->getFunction();
5760  const Function &Fb = b.getMF()->getFunction();
5761
5762  // If none of the functions have the "sign-return-address" attribute their
5763  // signing behaviour is equal
5764  if (!Fa.hasFnAttribute("sign-return-address") &&
5765      !Fb.hasFnAttribute("sign-return-address")) {
5766    return true;
5767  }
5768
5769  // If both functions have the "sign-return-address" attribute their signing
5770  // behaviour is equal, if the values of the attributes are equal
5771  if (Fa.hasFnAttribute("sign-return-address") &&
5772      Fb.hasFnAttribute("sign-return-address")) {
5773    StringRef ScopeA =
5774        Fa.getFnAttribute("sign-return-address").getValueAsString();
5775    StringRef ScopeB =
5776        Fb.getFnAttribute("sign-return-address").getValueAsString();
5777    return ScopeA.equals(ScopeB);
5778  }
5779
5780  // If function B doesn't have the "sign-return-address" attribute but A does,
5781  // the functions' signing behaviour is equal if A's value for
5782  // "sign-return-address" is "none" and vice versa.
5783  if (Fa.hasFnAttribute("sign-return-address")) {
5784    StringRef ScopeA =
5785        Fa.getFnAttribute("sign-return-address").getValueAsString();
5786    return ScopeA.equals("none");
5787  }
5788
5789  if (Fb.hasFnAttribute("sign-return-address")) {
5790    StringRef ScopeB =
5791        Fb.getFnAttribute("sign-return-address").getValueAsString();
5792    return ScopeB.equals("none");
5793  }
5794
5795  llvm_unreachable("Unkown combination of sign-return-address attributes");
5796}
5797
5798static bool
5799outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
5800                                       const outliner::Candidate &b) {
5801  const Function &Fa = a.getMF()->getFunction();
5802  const Function &Fb = b.getMF()->getFunction();
5803
5804  // If none of the functions have the "sign-return-address-key" attribute
5805  // their keys are equal
5806  if (!Fa.hasFnAttribute("sign-return-address-key") &&
5807      !Fb.hasFnAttribute("sign-return-address-key")) {
5808    return true;
5809  }
5810
5811  // If both functions have the "sign-return-address-key" attribute their
5812  // keys are equal if the values of "sign-return-address-key" are equal
5813  if (Fa.hasFnAttribute("sign-return-address-key") &&
5814      Fb.hasFnAttribute("sign-return-address-key")) {
5815    StringRef KeyA =
5816        Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5817    StringRef KeyB =
5818        Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5819    return KeyA.equals(KeyB);
5820  }
5821
5822  // If B doesn't have the "sign-return-address-key" attribute, both keys are
5823  // equal, if function a has the default key (a_key)
5824  if (Fa.hasFnAttribute("sign-return-address-key")) {
5825    StringRef KeyA =
5826        Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5827    return KeyA.equals_lower("a_key");
5828  }
5829
5830  if (Fb.hasFnAttribute("sign-return-address-key")) {
5831    StringRef KeyB =
5832        Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5833    return KeyB.equals_lower("a_key");
5834  }
5835
5836  llvm_unreachable("Unkown combination of sign-return-address-key attributes");
5837}
5838
5839static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
5840                                                const outliner::Candidate &b) {
5841  const AArch64Subtarget &SubtargetA =
5842      a.getMF()->getSubtarget<AArch64Subtarget>();
5843  const AArch64Subtarget &SubtargetB =
5844      b.getMF()->getSubtarget<AArch64Subtarget>();
5845  return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
5846}
5847
5848outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
5849    std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5850  outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5851  unsigned SequenceSize =
5852      std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
5853                      [this](unsigned Sum, const MachineInstr &MI) {
5854                        return Sum + getInstSizeInBytes(MI);
5855                      });
5856  unsigned NumBytesToCreateFrame = 0;
5857
5858  // We only allow outlining for functions having exactly matching return
5859  // address signing attributes, i.e., all share the same value for the
5860  // attribute "sign-return-address" and all share the same type of key they
5861  // are signed with.
5862  // Additionally we require all functions to simultaniously either support
5863  // v8.3a features or not. Otherwise an outlined function could get signed
5864  // using dedicated v8.3 instructions and a call from a function that doesn't
5865  // support v8.3 instructions would therefore be invalid.
5866  if (std::adjacent_find(
5867          RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5868          [](const outliner::Candidate &a, const outliner::Candidate &b) {
5869            // Return true if a and b are non-equal w.r.t. return address
5870            // signing or support of v8.3a features
5871            if (outliningCandidatesSigningScopeConsensus(a, b) &&
5872                outliningCandidatesSigningKeyConsensus(a, b) &&
5873                outliningCandidatesV8_3OpsConsensus(a, b)) {
5874              return false;
5875            }
5876            return true;
5877          }) != RepeatedSequenceLocs.end()) {
5878    return outliner::OutlinedFunction();
5879  }
5880
5881  // Since at this point all candidates agree on their return address signing
5882  // picking just one is fine. If the candidate functions potentially sign their
5883  // return addresses, the outlined function should do the same. Note that in
5884  // the case of "sign-return-address"="non-leaf" this is an assumption: It is
5885  // not certainly true that the outlined function will have to sign its return
5886  // address but this decision is made later, when the decision to outline
5887  // has already been made.
5888  // The same holds for the number of additional instructions we need: On
5889  // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
5890  // necessary. However, at this point we don't know if the outlined function
5891  // will have a RET instruction so we assume the worst.
5892  const Function &FCF = FirstCand.getMF()->getFunction();
5893  const TargetRegisterInfo &TRI = getRegisterInfo();
5894  if (FCF.hasFnAttribute("sign-return-address")) {
5895    // One PAC and one AUT instructions
5896    NumBytesToCreateFrame += 8;
5897
5898    // We have to check if sp modifying instructions would get outlined.
5899    // If so we only allow outlining if sp is unchanged overall, so matching
5900    // sub and add instructions are okay to outline, all other sp modifications
5901    // are not
5902    auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
5903      int SPValue = 0;
5904      MachineBasicBlock::iterator MBBI = C.front();
5905      for (;;) {
5906        if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
5907          switch (MBBI->getOpcode()) {
5908          case AArch64::ADDXri:
5909          case AArch64::ADDWri:
5910            assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5911            assert(MBBI->getOperand(2).isImm() &&
5912                   "Expected operand to be immediate");
5913            assert(MBBI->getOperand(1).isReg() &&
5914                   "Expected operand to be a register");
5915            // Check if the add just increments sp. If so, we search for
5916            // matching sub instructions that decrement sp. If not, the
5917            // modification is illegal
5918            if (MBBI->getOperand(1).getReg() == AArch64::SP)
5919              SPValue += MBBI->getOperand(2).getImm();
5920            else
5921              return true;
5922            break;
5923          case AArch64::SUBXri:
5924          case AArch64::SUBWri:
5925            assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5926            assert(MBBI->getOperand(2).isImm() &&
5927                   "Expected operand to be immediate");
5928            assert(MBBI->getOperand(1).isReg() &&
5929                   "Expected operand to be a register");
5930            // Check if the sub just decrements sp. If so, we search for
5931            // matching add instructions that increment sp. If not, the
5932            // modification is illegal
5933            if (MBBI->getOperand(1).getReg() == AArch64::SP)
5934              SPValue -= MBBI->getOperand(2).getImm();
5935            else
5936              return true;
5937            break;
5938          default:
5939            return true;
5940          }
5941        }
5942        if (MBBI == C.back())
5943          break;
5944        ++MBBI;
5945      }
5946      if (SPValue)
5947        return true;
5948      return false;
5949    };
5950    // Remove candidates with illegal stack modifying instructions
5951    RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5952                                              RepeatedSequenceLocs.end(),
5953                                              hasIllegalSPModification),
5954                               RepeatedSequenceLocs.end());
5955
5956    // If the sequence doesn't have enough candidates left, then we're done.
5957    if (RepeatedSequenceLocs.size() < 2)
5958      return outliner::OutlinedFunction();
5959  }
5960
5961  // Properties about candidate MBBs that hold for all of them.
5962  unsigned FlagsSetInAll = 0xF;
5963
5964  // Compute liveness information for each candidate, and set FlagsSetInAll.
5965  std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5966                [&FlagsSetInAll](outliner::Candidate &C) {
5967                  FlagsSetInAll &= C.Flags;
5968                });
5969
5970  // According to the AArch64 Procedure Call Standard, the following are
5971  // undefined on entry/exit from a function call:
5972  //
5973  // * Registers x16, x17, (and thus w16, w17)
5974  // * Condition codes (and thus the NZCV register)
5975  //
5976  // Because if this, we can't outline any sequence of instructions where
5977  // one
5978  // of these registers is live into/across it. Thus, we need to delete
5979  // those
5980  // candidates.
5981  auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5982    // If the unsafe registers in this block are all dead, then we don't need
5983    // to compute liveness here.
5984    if (C.Flags & UnsafeRegsDead)
5985      return false;
5986    C.initLRU(TRI);
5987    LiveRegUnits LRU = C.LRU;
5988    return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
5989            !LRU.available(AArch64::NZCV));
5990  };
5991
5992  // Are there any candidates where those registers are live?
5993  if (!(FlagsSetInAll & UnsafeRegsDead)) {
5994    // Erase every candidate that violates the restrictions above. (It could be
5995    // true that we have viable candidates, so it's not worth bailing out in
5996    // the case that, say, 1 out of 20 candidates violate the restructions.)
5997    RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5998                                              RepeatedSequenceLocs.end(),
5999                                              CantGuaranteeValueAcrossCall),
6000                               RepeatedSequenceLocs.end());
6001
6002    // If the sequence doesn't have enough candidates left, then we're done.
6003    if (RepeatedSequenceLocs.size() < 2)
6004      return outliner::OutlinedFunction();
6005  }
6006
6007  // At this point, we have only "safe" candidates to outline. Figure out
6008  // frame + call instruction information.
6009
6010  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
6011
6012  // Helper lambda which sets call information for every candidate.
6013  auto SetCandidateCallInfo =
6014      [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
6015        for (outliner::Candidate &C : RepeatedSequenceLocs)
6016          C.setCallInfo(CallID, NumBytesForCall);
6017      };
6018
6019  unsigned FrameID = MachineOutlinerDefault;
6020  NumBytesToCreateFrame += 4;
6021
6022  bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
6023    return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
6024  });
6025
6026  // We check to see if CFI Instructions are present, and if they are
6027  // we find the number of CFI Instructions in the candidates.
6028  unsigned CFICount = 0;
6029  MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
6030  for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
6031       Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
6032    const std::vector<MCCFIInstruction> &CFIInstructions =
6033        RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
6034    if (MBBI->isCFIInstruction()) {
6035      unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
6036      MCCFIInstruction CFI = CFIInstructions[CFIIndex];
6037      CFICount++;
6038    }
6039    MBBI++;
6040  }
6041
6042  // We compare the number of found CFI Instructions to  the number of CFI
6043  // instructions in the parent function for each candidate.  We must check this
6044  // since if we outline one of the CFI instructions in a function, we have to
6045  // outline them all for correctness. If we do not, the address offsets will be
6046  // incorrect between the two sections of the program.
6047  for (outliner::Candidate &C : RepeatedSequenceLocs) {
6048    std::vector<MCCFIInstruction> CFIInstructions =
6049        C.getMF()->getFrameInstructions();
6050
6051    if (CFICount > 0 && CFICount != CFIInstructions.size())
6052      return outliner::OutlinedFunction();
6053  }
6054
6055  // Returns true if an instructions is safe to fix up, false otherwise.
6056  auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
6057    if (MI.isCall())
6058      return true;
6059
6060    if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
6061        !MI.readsRegister(AArch64::SP, &TRI))
6062      return true;
6063
6064    // Any modification of SP will break our code to save/restore LR.
6065    // FIXME: We could handle some instructions which add a constant
6066    // offset to SP, with a bit more work.
6067    if (MI.modifiesRegister(AArch64::SP, &TRI))
6068      return false;
6069
6070    // At this point, we have a stack instruction that we might need to
6071    // fix up. We'll handle it if it's a load or store.
6072    if (MI.mayLoadOrStore()) {
6073      const MachineOperand *Base; // Filled with the base operand of MI.
6074      int64_t Offset;             // Filled with the offset of MI.
6075      bool OffsetIsScalable;
6076
6077      // Does it allow us to offset the base operand and is the base the
6078      // register SP?
6079      if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
6080          !Base->isReg() || Base->getReg() != AArch64::SP)
6081        return false;
6082
6083      // Fixe-up code below assumes bytes.
6084      if (OffsetIsScalable)
6085        return false;
6086
6087      // Find the minimum/maximum offset for this instruction and check
6088      // if fixing it up would be in range.
6089      int64_t MinOffset,
6090          MaxOffset;  // Unscaled offsets for the instruction.
6091      TypeSize Scale(0U, false); // The scale to multiply the offsets by.
6092      unsigned DummyWidth;
6093      getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
6094
6095      Offset += 16; // Update the offset to what it would be if we outlined.
6096      if (Offset < MinOffset * (int64_t)Scale.getFixedSize() ||
6097          Offset > MaxOffset * (int64_t)Scale.getFixedSize())
6098        return false;
6099
6100      // It's in range, so we can outline it.
6101      return true;
6102    }
6103
6104    // FIXME: Add handling for instructions like "add x0, sp, #8".
6105
6106    // We can't fix it up, so don't outline it.
6107    return false;
6108  };
6109
6110  // True if it's possible to fix up each stack instruction in this sequence.
6111  // Important for frames/call variants that modify the stack.
6112  bool AllStackInstrsSafe = std::all_of(
6113      FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
6114
6115  // If the last instruction in any candidate is a terminator, then we should
6116  // tail call all of the candidates.
6117  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
6118    FrameID = MachineOutlinerTailCall;
6119    NumBytesToCreateFrame = 0;
6120    SetCandidateCallInfo(MachineOutlinerTailCall, 4);
6121  }
6122
6123  else if (LastInstrOpcode == AArch64::BL ||
6124           ((LastInstrOpcode == AArch64::BLR ||
6125             LastInstrOpcode == AArch64::BLRNoIP) &&
6126            !HasBTI)) {
6127    // FIXME: Do we need to check if the code after this uses the value of LR?
6128    FrameID = MachineOutlinerThunk;
6129    NumBytesToCreateFrame = 0;
6130    SetCandidateCallInfo(MachineOutlinerThunk, 4);
6131  }
6132
6133  else {
6134    // We need to decide how to emit calls + frames. We can always emit the same
6135    // frame if we don't need to save to the stack. If we have to save to the
6136    // stack, then we need a different frame.
6137    unsigned NumBytesNoStackCalls = 0;
6138    std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
6139
6140    // Check if we have to save LR.
6141    for (outliner::Candidate &C : RepeatedSequenceLocs) {
6142      C.initLRU(TRI);
6143
6144      // If we have a noreturn caller, then we're going to be conservative and
6145      // say that we have to save LR. If we don't have a ret at the end of the
6146      // block, then we can't reason about liveness accurately.
6147      //
6148      // FIXME: We can probably do better than always disabling this in
6149      // noreturn functions by fixing up the liveness info.
6150      bool IsNoReturn =
6151          C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
6152
6153      // Is LR available? If so, we don't need a save.
6154      if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
6155        NumBytesNoStackCalls += 4;
6156        C.setCallInfo(MachineOutlinerNoLRSave, 4);
6157        CandidatesWithoutStackFixups.push_back(C);
6158      }
6159
6160      // Is an unused register available? If so, we won't modify the stack, so
6161      // we can outline with the same frame type as those that don't save LR.
6162      else if (findRegisterToSaveLRTo(C)) {
6163        NumBytesNoStackCalls += 12;
6164        C.setCallInfo(MachineOutlinerRegSave, 12);
6165        CandidatesWithoutStackFixups.push_back(C);
6166      }
6167
6168      // Is SP used in the sequence at all? If not, we don't have to modify
6169      // the stack, so we are guaranteed to get the same frame.
6170      else if (C.UsedInSequence.available(AArch64::SP)) {
6171        NumBytesNoStackCalls += 12;
6172        C.setCallInfo(MachineOutlinerDefault, 12);
6173        CandidatesWithoutStackFixups.push_back(C);
6174      }
6175
6176      // If we outline this, we need to modify the stack. Pretend we don't
6177      // outline this by saving all of its bytes.
6178      else {
6179        NumBytesNoStackCalls += SequenceSize;
6180      }
6181    }
6182
6183    // If there are no places where we have to save LR, then note that we
6184    // don't have to update the stack. Otherwise, give every candidate the
6185    // default call type, as long as it's safe to do so.
6186    if (!AllStackInstrsSafe ||
6187        NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
6188      RepeatedSequenceLocs = CandidatesWithoutStackFixups;
6189      FrameID = MachineOutlinerNoLRSave;
6190    } else {
6191      SetCandidateCallInfo(MachineOutlinerDefault, 12);
6192    }
6193
6194    // If we dropped all of the candidates, bail out here.
6195    if (RepeatedSequenceLocs.size() < 2) {
6196      RepeatedSequenceLocs.clear();
6197      return outliner::OutlinedFunction();
6198    }
6199  }
6200
6201  // Does every candidate's MBB contain a call? If so, then we might have a call
6202  // in the range.
6203  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6204    // Check if the range contains a call. These require a save + restore of the
6205    // link register.
6206    bool ModStackToSaveLR = false;
6207    if (std::any_of(FirstCand.front(), FirstCand.back(),
6208                    [](const MachineInstr &MI) { return MI.isCall(); }))
6209      ModStackToSaveLR = true;
6210
6211    // Handle the last instruction separately. If this is a tail call, then the
6212    // last instruction is a call. We don't want to save + restore in this case.
6213    // However, it could be possible that the last instruction is a call without
6214    // it being valid to tail call this sequence. We should consider this as
6215    // well.
6216    else if (FrameID != MachineOutlinerThunk &&
6217             FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
6218      ModStackToSaveLR = true;
6219
6220    if (ModStackToSaveLR) {
6221      // We can't fix up the stack. Bail out.
6222      if (!AllStackInstrsSafe) {
6223        RepeatedSequenceLocs.clear();
6224        return outliner::OutlinedFunction();
6225      }
6226
6227      // Save + restore LR.
6228      NumBytesToCreateFrame += 8;
6229    }
6230  }
6231
6232  // If we have CFI instructions, we can only outline if the outlined section
6233  // can be a tail call
6234  if (FrameID != MachineOutlinerTailCall && CFICount > 0)
6235    return outliner::OutlinedFunction();
6236
6237  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
6238                                    NumBytesToCreateFrame, FrameID);
6239}
6240
6241bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
6242    MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6243  const Function &F = MF.getFunction();
6244
6245  // Can F be deduplicated by the linker? If it can, don't outline from it.
6246  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6247    return false;
6248
6249  // Don't outline from functions with section markings; the program could
6250  // expect that all the code is in the named section.
6251  // FIXME: Allow outlining from multiple functions with the same section
6252  // marking.
6253  if (F.hasSection())
6254    return false;
6255
6256  // Outlining from functions with redzones is unsafe since the outliner may
6257  // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
6258  // outline from it.
6259  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
6260  if (!AFI || AFI->hasRedZone().getValueOr(true))
6261    return false;
6262
6263  // FIXME: Teach the outliner to generate/handle Windows unwind info.
6264  if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
6265    return false;
6266
6267  // It's safe to outline from MF.
6268  return true;
6269}
6270
6271bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
6272                                              unsigned &Flags) const {
6273  // Check if LR is available through all of the MBB. If it's not, then set
6274  // a flag.
6275  assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6276         "Suitable Machine Function for outlining must track liveness");
6277  LiveRegUnits LRU(getRegisterInfo());
6278
6279  std::for_each(MBB.rbegin(), MBB.rend(),
6280                [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
6281
6282  // Check if each of the unsafe registers are available...
6283  bool W16AvailableInBlock = LRU.available(AArch64::W16);
6284  bool W17AvailableInBlock = LRU.available(AArch64::W17);
6285  bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
6286
6287  // If all of these are dead (and not live out), we know we don't have to check
6288  // them later.
6289  if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
6290    Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6291
6292  // Now, add the live outs to the set.
6293  LRU.addLiveOuts(MBB);
6294
6295  // If any of these registers is available in the MBB, but also a live out of
6296  // the block, then we know outlining is unsafe.
6297  if (W16AvailableInBlock && !LRU.available(AArch64::W16))
6298    return false;
6299  if (W17AvailableInBlock && !LRU.available(AArch64::W17))
6300    return false;
6301  if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
6302    return false;
6303
6304  // Check if there's a call inside this MachineBasicBlock. If there is, then
6305  // set a flag.
6306  if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6307    Flags |= MachineOutlinerMBBFlags::HasCalls;
6308
6309  MachineFunction *MF = MBB.getParent();
6310
6311  // In the event that we outline, we may have to save LR. If there is an
6312  // available register in the MBB, then we'll always save LR there. Check if
6313  // this is true.
6314  bool CanSaveLR = false;
6315  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
6316      MF->getSubtarget().getRegisterInfo());
6317
6318  // Check if there is an available register across the sequence that we can
6319  // use.
6320  for (unsigned Reg : AArch64::GPR64RegClass) {
6321    if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
6322        Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
6323      CanSaveLR = true;
6324      break;
6325    }
6326  }
6327
6328  // Check if we have a register we can save LR to, and if LR was used
6329  // somewhere. If both of those things are true, then we need to evaluate the
6330  // safety of outlining stack instructions later.
6331  if (!CanSaveLR && !LRU.available(AArch64::LR))
6332    Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6333
6334  return true;
6335}
6336
6337outliner::InstrType
6338AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
6339                                   unsigned Flags) const {
6340  MachineInstr &MI = *MIT;
6341  MachineBasicBlock *MBB = MI.getParent();
6342  MachineFunction *MF = MBB->getParent();
6343  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
6344
6345  // Don't outline anything used for return address signing. The outlined
6346  // function will get signed later if needed
6347  switch (MI.getOpcode()) {
6348  case AArch64::PACIASP:
6349  case AArch64::PACIBSP:
6350  case AArch64::AUTIASP:
6351  case AArch64::AUTIBSP:
6352  case AArch64::RETAA:
6353  case AArch64::RETAB:
6354  case AArch64::EMITBKEY:
6355    return outliner::InstrType::Illegal;
6356  }
6357
6358  // Don't outline LOHs.
6359  if (FuncInfo->getLOHRelated().count(&MI))
6360    return outliner::InstrType::Illegal;
6361
6362  // We can only outline these if we will tail call the outlined function, or
6363  // fix up the CFI offsets. Currently, CFI instructions are outlined only if
6364  // in a tail call.
6365  //
6366  // FIXME: If the proper fixups for the offset are implemented, this should be
6367  // possible.
6368  if (MI.isCFIInstruction())
6369    return outliner::InstrType::Legal;
6370
6371  // Don't allow debug values to impact outlining type.
6372  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
6373    return outliner::InstrType::Invisible;
6374
6375  // At this point, KILL instructions don't really tell us much so we can go
6376  // ahead and skip over them.
6377  if (MI.isKill())
6378    return outliner::InstrType::Invisible;
6379
6380  // Is this a terminator for a basic block?
6381  if (MI.isTerminator()) {
6382
6383    // Is this the end of a function?
6384    if (MI.getParent()->succ_empty())
6385      return outliner::InstrType::Legal;
6386
6387    // It's not, so don't outline it.
6388    return outliner::InstrType::Illegal;
6389  }
6390
6391  // Make sure none of the operands are un-outlinable.
6392  for (const MachineOperand &MOP : MI.operands()) {
6393    if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
6394        MOP.isTargetIndex())
6395      return outliner::InstrType::Illegal;
6396
6397    // If it uses LR or W30 explicitly, then don't touch it.
6398    if (MOP.isReg() && !MOP.isImplicit() &&
6399        (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
6400      return outliner::InstrType::Illegal;
6401  }
6402
6403  // Special cases for instructions that can always be outlined, but will fail
6404  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
6405  // be outlined because they don't require a *specific* value to be in LR.
6406  if (MI.getOpcode() == AArch64::ADRP)
6407    return outliner::InstrType::Legal;
6408
6409  // If MI is a call we might be able to outline it. We don't want to outline
6410  // any calls that rely on the position of items on the stack. When we outline
6411  // something containing a call, we have to emit a save and restore of LR in
6412  // the outlined function. Currently, this always happens by saving LR to the
6413  // stack. Thus, if we outline, say, half the parameters for a function call
6414  // plus the call, then we'll break the callee's expectations for the layout
6415  // of the stack.
6416  //
6417  // FIXME: Allow calls to functions which construct a stack frame, as long
6418  // as they don't access arguments on the stack.
6419  // FIXME: Figure out some way to analyze functions defined in other modules.
6420  // We should be able to compute the memory usage based on the IR calling
6421  // convention, even if we can't see the definition.
6422  if (MI.isCall()) {
6423    // Get the function associated with the call. Look at each operand and find
6424    // the one that represents the callee and get its name.
6425    const Function *Callee = nullptr;
6426    for (const MachineOperand &MOP : MI.operands()) {
6427      if (MOP.isGlobal()) {
6428        Callee = dyn_cast<Function>(MOP.getGlobal());
6429        break;
6430      }
6431    }
6432
6433    // Never outline calls to mcount.  There isn't any rule that would require
6434    // this, but the Linux kernel's "ftrace" feature depends on it.
6435    if (Callee && Callee->getName() == "\01_mcount")
6436      return outliner::InstrType::Illegal;
6437
6438    // If we don't know anything about the callee, assume it depends on the
6439    // stack layout of the caller. In that case, it's only legal to outline
6440    // as a tail-call. Explicitly list the call instructions we know about so we
6441    // don't get unexpected results with call pseudo-instructions.
6442    auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6443    if (MI.getOpcode() == AArch64::BLR ||
6444        MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
6445      UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6446
6447    if (!Callee)
6448      return UnknownCallOutlineType;
6449
6450    // We have a function we have information about. Check it if it's something
6451    // can safely outline.
6452    MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6453
6454    // We don't know what's going on with the callee at all. Don't touch it.
6455    if (!CalleeMF)
6456      return UnknownCallOutlineType;
6457
6458    // Check if we know anything about the callee saves on the function. If we
6459    // don't, then don't touch it, since that implies that we haven't
6460    // computed anything about its stack frame yet.
6461    MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6462    if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6463        MFI.getNumObjects() > 0)
6464      return UnknownCallOutlineType;
6465
6466    // At this point, we can say that CalleeMF ought to not pass anything on the
6467    // stack. Therefore, we can outline it.
6468    return outliner::InstrType::Legal;
6469  }
6470
6471  // Don't outline positions.
6472  if (MI.isPosition())
6473    return outliner::InstrType::Illegal;
6474
6475  // Don't touch the link register or W30.
6476  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
6477      MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
6478    return outliner::InstrType::Illegal;
6479
6480  // Don't outline BTI instructions, because that will prevent the outlining
6481  // site from being indirectly callable.
6482  if (MI.getOpcode() == AArch64::HINT) {
6483    int64_t Imm = MI.getOperand(0).getImm();
6484    if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
6485      return outliner::InstrType::Illegal;
6486  }
6487
6488  return outliner::InstrType::Legal;
6489}
6490
6491void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6492  for (MachineInstr &MI : MBB) {
6493    const MachineOperand *Base;
6494    unsigned Width;
6495    int64_t Offset;
6496    bool OffsetIsScalable;
6497
6498    // Is this a load or store with an immediate offset with SP as the base?
6499    if (!MI.mayLoadOrStore() ||
6500        !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
6501                                      &RI) ||
6502        (Base->isReg() && Base->getReg() != AArch64::SP))
6503      continue;
6504
6505    // It is, so we have to fix it up.
6506    TypeSize Scale(0U, false);
6507    int64_t Dummy1, Dummy2;
6508
6509    MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
6510    assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
6511    getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
6512    assert(Scale != 0 && "Unexpected opcode!");
6513    assert(!OffsetIsScalable && "Expected offset to be a byte offset");
6514
6515    // We've pushed the return address to the stack, so add 16 to the offset.
6516    // This is safe, since we already checked if it would overflow when we
6517    // checked if this instruction was legal to outline.
6518    int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize();
6519    StackOffsetOperand.setImm(NewImm);
6520  }
6521}
6522
6523static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
6524                                 bool ShouldSignReturnAddr,
6525                                 bool ShouldSignReturnAddrWithAKey) {
6526  if (ShouldSignReturnAddr) {
6527    MachineBasicBlock::iterator MBBPAC = MBB.begin();
6528    MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
6529    const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
6530    const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6531    DebugLoc DL;
6532
6533    if (MBBAUT != MBB.end())
6534      DL = MBBAUT->getDebugLoc();
6535
6536    // At the very beginning of the basic block we insert the following
6537    // depending on the key type
6538    //
6539    // a_key:                   b_key:
6540    //    PACIASP                   EMITBKEY
6541    //    CFI_INSTRUCTION           PACIBSP
6542    //                              CFI_INSTRUCTION
6543    if (ShouldSignReturnAddrWithAKey) {
6544      BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
6545          .setMIFlag(MachineInstr::FrameSetup);
6546    } else {
6547      BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
6548          .setMIFlag(MachineInstr::FrameSetup);
6549      BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
6550          .setMIFlag(MachineInstr::FrameSetup);
6551    }
6552    unsigned CFIIndex =
6553        MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
6554    BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
6555        .addCFIIndex(CFIIndex)
6556        .setMIFlags(MachineInstr::FrameSetup);
6557
6558    // If v8.3a features are available we can replace a RET instruction by
6559    // RETAA or RETAB and omit the AUT instructions
6560    if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() &&
6561        MBBAUT->getOpcode() == AArch64::RET) {
6562      BuildMI(MBB, MBBAUT, DL,
6563              TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
6564                                                    : AArch64::RETAB))
6565          .copyImplicitOps(*MBBAUT);
6566      MBB.erase(MBBAUT);
6567    } else {
6568      BuildMI(MBB, MBBAUT, DL,
6569              TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
6570                                                    : AArch64::AUTIBSP))
6571          .setMIFlag(MachineInstr::FrameDestroy);
6572    }
6573  }
6574}
6575
6576void AArch64InstrInfo::buildOutlinedFrame(
6577    MachineBasicBlock &MBB, MachineFunction &MF,
6578    const outliner::OutlinedFunction &OF) const {
6579
6580  AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
6581
6582  if (OF.FrameConstructionID == MachineOutlinerTailCall)
6583    FI->setOutliningStyle("Tail Call");
6584  else if (OF.FrameConstructionID == MachineOutlinerThunk) {
6585    // For thunk outlining, rewrite the last instruction from a call to a
6586    // tail-call.
6587    MachineInstr *Call = &*--MBB.instr_end();
6588    unsigned TailOpcode;
6589    if (Call->getOpcode() == AArch64::BL) {
6590      TailOpcode = AArch64::TCRETURNdi;
6591    } else {
6592      assert(Call->getOpcode() == AArch64::BLR ||
6593             Call->getOpcode() == AArch64::BLRNoIP);
6594      TailOpcode = AArch64::TCRETURNriALL;
6595    }
6596    MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
6597                           .add(Call->getOperand(0))
6598                           .addImm(0);
6599    MBB.insert(MBB.end(), TC);
6600    Call->eraseFromParent();
6601
6602    FI->setOutliningStyle("Thunk");
6603  }
6604
6605  bool IsLeafFunction = true;
6606
6607  // Is there a call in the outlined range?
6608  auto IsNonTailCall = [](const MachineInstr &MI) {
6609    return MI.isCall() && !MI.isReturn();
6610  };
6611
6612  if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
6613    // Fix up the instructions in the range, since we're going to modify the
6614    // stack.
6615    assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6616           "Can only fix up stack references once");
6617    fixupPostOutline(MBB);
6618
6619    IsLeafFunction = false;
6620
6621    // LR has to be a live in so that we can save it.
6622    if (!MBB.isLiveIn(AArch64::LR))
6623      MBB.addLiveIn(AArch64::LR);
6624
6625    MachineBasicBlock::iterator It = MBB.begin();
6626    MachineBasicBlock::iterator Et = MBB.end();
6627
6628    if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6629        OF.FrameConstructionID == MachineOutlinerThunk)
6630      Et = std::prev(MBB.end());
6631
6632    // Insert a save before the outlined region
6633    MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6634                                .addReg(AArch64::SP, RegState::Define)
6635                                .addReg(AArch64::LR)
6636                                .addReg(AArch64::SP)
6637                                .addImm(-16);
6638    It = MBB.insert(It, STRXpre);
6639
6640    const TargetSubtargetInfo &STI = MF.getSubtarget();
6641    const MCRegisterInfo *MRI = STI.getRegisterInfo();
6642    unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
6643
6644    // Add a CFI saying the stack was moved 16 B down.
6645    int64_t StackPosEntry =
6646        MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
6647    BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6648        .addCFIIndex(StackPosEntry)
6649        .setMIFlags(MachineInstr::FrameSetup);
6650
6651    // Add a CFI saying that the LR that we want to find is now 16 B higher than
6652    // before.
6653    int64_t LRPosEntry =
6654        MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
6655    BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6656        .addCFIIndex(LRPosEntry)
6657        .setMIFlags(MachineInstr::FrameSetup);
6658
6659    // Insert a restore before the terminator for the function.
6660    MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6661                                 .addReg(AArch64::SP, RegState::Define)
6662                                 .addReg(AArch64::LR, RegState::Define)
6663                                 .addReg(AArch64::SP)
6664                                 .addImm(16);
6665    Et = MBB.insert(Et, LDRXpost);
6666  }
6667
6668  // If a bunch of candidates reach this point they must agree on their return
6669  // address signing. It is therefore enough to just consider the signing
6670  // behaviour of one of them
6671  const Function &CF = OF.Candidates.front().getMF()->getFunction();
6672  bool ShouldSignReturnAddr = false;
6673  if (CF.hasFnAttribute("sign-return-address")) {
6674    StringRef Scope =
6675        CF.getFnAttribute("sign-return-address").getValueAsString();
6676    if (Scope.equals("all"))
6677      ShouldSignReturnAddr = true;
6678    else if (Scope.equals("non-leaf") && !IsLeafFunction)
6679      ShouldSignReturnAddr = true;
6680  }
6681
6682  // a_key is the default
6683  bool ShouldSignReturnAddrWithAKey = true;
6684  if (CF.hasFnAttribute("sign-return-address-key")) {
6685    const StringRef Key =
6686        CF.getFnAttribute("sign-return-address-key").getValueAsString();
6687    // Key can either be a_key or b_key
6688    assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) &&
6689           "Return address signing key must be either a_key or b_key");
6690    ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key");
6691  }
6692
6693  // If this is a tail call outlined function, then there's already a return.
6694  if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6695      OF.FrameConstructionID == MachineOutlinerThunk) {
6696    signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6697                         ShouldSignReturnAddrWithAKey);
6698    return;
6699  }
6700
6701  // It's not a tail call, so we have to insert the return ourselves.
6702
6703  // LR has to be a live in so that we can return to it.
6704  if (!MBB.isLiveIn(AArch64::LR))
6705    MBB.addLiveIn(AArch64::LR);
6706
6707  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
6708                          .addReg(AArch64::LR);
6709  MBB.insert(MBB.end(), ret);
6710
6711  signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6712                       ShouldSignReturnAddrWithAKey);
6713
6714  FI->setOutliningStyle("Function");
6715
6716  // Did we have to modify the stack by saving the link register?
6717  if (OF.FrameConstructionID != MachineOutlinerDefault)
6718    return;
6719
6720  // We modified the stack.
6721  // Walk over the basic block and fix up all the stack accesses.
6722  fixupPostOutline(MBB);
6723}
6724
6725MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
6726    Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
6727    MachineFunction &MF, const outliner::Candidate &C) const {
6728
6729  // Are we tail calling?
6730  if (C.CallConstructionID == MachineOutlinerTailCall) {
6731    // If yes, then we can just branch to the label.
6732    It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
6733                            .addGlobalAddress(M.getNamedValue(MF.getName()))
6734                            .addImm(0));
6735    return It;
6736  }
6737
6738  // Are we saving the link register?
6739  if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6740      C.CallConstructionID == MachineOutlinerThunk) {
6741    // No, so just insert the call.
6742    It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6743                            .addGlobalAddress(M.getNamedValue(MF.getName())));
6744    return It;
6745  }
6746
6747  // We want to return the spot where we inserted the call.
6748  MachineBasicBlock::iterator CallPt;
6749
6750  // Instructions for saving and restoring LR around the call instruction we're
6751  // going to insert.
6752  MachineInstr *Save;
6753  MachineInstr *Restore;
6754  // Can we save to a register?
6755  if (C.CallConstructionID == MachineOutlinerRegSave) {
6756    // FIXME: This logic should be sunk into a target-specific interface so that
6757    // we don't have to recompute the register.
6758    unsigned Reg = findRegisterToSaveLRTo(C);
6759    assert(Reg != 0 && "No callee-saved register available?");
6760
6761    // Save and restore LR from that register.
6762    Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
6763               .addReg(AArch64::XZR)
6764               .addReg(AArch64::LR)
6765               .addImm(0);
6766    Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
6767                .addReg(AArch64::XZR)
6768                .addReg(Reg)
6769                .addImm(0);
6770  } else {
6771    // We have the default case. Save and restore from SP.
6772    Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6773               .addReg(AArch64::SP, RegState::Define)
6774               .addReg(AArch64::LR)
6775               .addReg(AArch64::SP)
6776               .addImm(-16);
6777    Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6778                  .addReg(AArch64::SP, RegState::Define)
6779                  .addReg(AArch64::LR, RegState::Define)
6780                  .addReg(AArch64::SP)
6781                  .addImm(16);
6782  }
6783
6784  It = MBB.insert(It, Save);
6785  It++;
6786
6787  // Insert the call.
6788  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6789                          .addGlobalAddress(M.getNamedValue(MF.getName())));
6790  CallPt = It;
6791  It++;
6792
6793  It = MBB.insert(It, Restore);
6794  return CallPt;
6795}
6796
6797bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
6798  MachineFunction &MF) const {
6799  return MF.getFunction().hasMinSize();
6800}
6801
6802Optional<DestSourcePair>
6803AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
6804
6805  // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
6806  // and zero immediate operands used as an alias for mov instruction.
6807  if (MI.getOpcode() == AArch64::ORRWrs &&
6808      MI.getOperand(1).getReg() == AArch64::WZR &&
6809      MI.getOperand(3).getImm() == 0x0) {
6810    return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6811  }
6812
6813  if (MI.getOpcode() == AArch64::ORRXrs &&
6814      MI.getOperand(1).getReg() == AArch64::XZR &&
6815      MI.getOperand(3).getImm() == 0x0) {
6816    return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6817  }
6818
6819  return None;
6820}
6821
6822Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
6823                                                      Register Reg) const {
6824  int Sign = 1;
6825  int64_t Offset = 0;
6826
6827  // TODO: Handle cases where Reg is a super- or sub-register of the
6828  // destination register.
6829  const MachineOperand &Op0 = MI.getOperand(0);
6830  if (!Op0.isReg() || Reg != Op0.getReg())
6831    return None;
6832
6833  switch (MI.getOpcode()) {
6834  default:
6835    return None;
6836  case AArch64::SUBWri:
6837  case AArch64::SUBXri:
6838  case AArch64::SUBSWri:
6839  case AArch64::SUBSXri:
6840    Sign *= -1;
6841    LLVM_FALLTHROUGH;
6842  case AArch64::ADDSWri:
6843  case AArch64::ADDSXri:
6844  case AArch64::ADDWri:
6845  case AArch64::ADDXri: {
6846    // TODO: Third operand can be global address (usually some string).
6847    if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
6848        !MI.getOperand(2).isImm())
6849      return None;
6850    Offset = MI.getOperand(2).getImm() * Sign;
6851    int Shift = MI.getOperand(3).getImm();
6852    assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
6853    Offset = Offset << Shift;
6854  }
6855  }
6856  return RegImmPair{MI.getOperand(1).getReg(), Offset};
6857}
6858
6859/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
6860/// the destination register then, if possible, describe the value in terms of
6861/// the source register.
6862static Optional<ParamLoadedValue>
6863describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
6864                       const TargetInstrInfo *TII,
6865                       const TargetRegisterInfo *TRI) {
6866  auto DestSrc = TII->isCopyInstr(MI);
6867  if (!DestSrc)
6868    return None;
6869
6870  Register DestReg = DestSrc->Destination->getReg();
6871  Register SrcReg = DestSrc->Source->getReg();
6872
6873  auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
6874
6875  // If the described register is the destination, just return the source.
6876  if (DestReg == DescribedReg)
6877    return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6878
6879  // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
6880  if (MI.getOpcode() == AArch64::ORRWrs &&
6881      TRI->isSuperRegister(DestReg, DescribedReg))
6882    return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6883
6884  // We may need to describe the lower part of a ORRXrs move.
6885  if (MI.getOpcode() == AArch64::ORRXrs &&
6886      TRI->isSubRegister(DestReg, DescribedReg)) {
6887    Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
6888    return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
6889  }
6890
6891  assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
6892         "Unhandled ORR[XW]rs copy case");
6893
6894  return None;
6895}
6896
6897Optional<ParamLoadedValue>
6898AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
6899                                      Register Reg) const {
6900  const MachineFunction *MF = MI.getMF();
6901  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
6902  switch (MI.getOpcode()) {
6903  case AArch64::MOVZWi:
6904  case AArch64::MOVZXi: {
6905    // MOVZWi may be used for producing zero-extended 32-bit immediates in
6906    // 64-bit parameters, so we need to consider super-registers.
6907    if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
6908      return None;
6909
6910    if (!MI.getOperand(1).isImm())
6911      return None;
6912    int64_t Immediate = MI.getOperand(1).getImm();
6913    int Shift = MI.getOperand(2).getImm();
6914    return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
6915                            nullptr);
6916  }
6917  case AArch64::ORRWrs:
6918  case AArch64::ORRXrs:
6919    return describeORRLoadedValue(MI, Reg, this, TRI);
6920  }
6921
6922  return TargetInstrInfo::describeLoadedValue(MI, Reg);
6923}
6924
6925uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
6926  return get(Opc).TSFlags & AArch64::ElementSizeMask;
6927}
6928
6929unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
6930  if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
6931    return AArch64::BLRNoIP;
6932  else
6933    return AArch64::BLR;
6934}
6935
6936#define GET_INSTRINFO_HELPERS
6937#define GET_INSTRMAP_INFO
6938#include "AArch64GenInstrInfo.inc"
6939