1311116Sdim//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2311116Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6311116Sdim//
7311116Sdim//===----------------------------------------------------------------------===//
8311116Sdim/// \file
9311116Sdim/// This file implements the targeting of the InstructionSelector class for
10311116Sdim/// AArch64.
11311116Sdim/// \todo This should be generated by TableGen.
12311116Sdim//===----------------------------------------------------------------------===//
13311116Sdim
14311116Sdim#include "AArch64InstrInfo.h"
15321369Sdim#include "AArch64MachineFunctionInfo.h"
16311116Sdim#include "AArch64RegisterBankInfo.h"
17311116Sdim#include "AArch64RegisterInfo.h"
18311116Sdim#include "AArch64Subtarget.h"
19311116Sdim#include "AArch64TargetMachine.h"
20311116Sdim#include "MCTargetDesc/AArch64AddressingModes.h"
21353358Sdim#include "llvm/ADT/Optional.h"
22321369Sdim#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23327952Sdim#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24341825Sdim#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25353358Sdim#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26321369Sdim#include "llvm/CodeGen/GlobalISel/Utils.h"
27311116Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
28353358Sdim#include "llvm/CodeGen/MachineConstantPool.h"
29311116Sdim#include "llvm/CodeGen/MachineFunction.h"
30311116Sdim#include "llvm/CodeGen/MachineInstr.h"
31311116Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
32321369Sdim#include "llvm/CodeGen/MachineOperand.h"
33311116Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
34311116Sdim#include "llvm/IR/Type.h"
35360784Sdim#include "llvm/IR/IntrinsicsAArch64.h"
36311116Sdim#include "llvm/Support/Debug.h"
37311116Sdim#include "llvm/Support/raw_ostream.h"
38311116Sdim
39311116Sdim#define DEBUG_TYPE "aarch64-isel"
40311116Sdim
41311116Sdimusing namespace llvm;
42311116Sdim
43321369Sdimnamespace {
44321369Sdim
45321369Sdim#define GET_GLOBALISEL_PREDICATE_BITSET
46311116Sdim#include "AArch64GenGlobalISel.inc"
47321369Sdim#undef GET_GLOBALISEL_PREDICATE_BITSET
48311116Sdim
49321369Sdimclass AArch64InstructionSelector : public InstructionSelector {
50321369Sdimpublic:
51321369Sdim  AArch64InstructionSelector(const AArch64TargetMachine &TM,
52321369Sdim                             const AArch64Subtarget &STI,
53321369Sdim                             const AArch64RegisterBankInfo &RBI);
54321369Sdim
55360784Sdim  bool select(MachineInstr &I) override;
56327952Sdim  static const char *getName() { return DEBUG_TYPE; }
57321369Sdim
58360784Sdim  void setupMF(MachineFunction &MF, GISelKnownBits &KB,
59360784Sdim               CodeGenCoverage &CoverageInfo) override {
60360784Sdim    InstructionSelector::setupMF(MF, KB, CoverageInfo);
61360784Sdim
62360784Sdim    // hasFnAttribute() is expensive to call on every BRCOND selection, so
63360784Sdim    // cache it here for each run of the selector.
64360784Sdim    ProduceNonFlagSettingCondBr =
65360784Sdim        !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
66360784Sdim  }
67360784Sdim
68321369Sdimprivate:
69321369Sdim  /// tblgen-erated 'select' implementation, used as the initial selector for
70321369Sdim  /// the patterns that don't require complex C++.
71327952Sdim  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
72321369Sdim
73353358Sdim  // A lowering phase that runs before any selection attempts.
74353358Sdim
75353358Sdim  void preISelLower(MachineInstr &I) const;
76353358Sdim
77353358Sdim  // An early selection function that runs before the selectImpl() call.
78353358Sdim  bool earlySelect(MachineInstr &I) const;
79353358Sdim
80353358Sdim  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
81353358Sdim
82360784Sdim  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
83360784Sdim  void contractCrossBankCopyIntoStore(MachineInstr &I,
84360784Sdim                                      MachineRegisterInfo &MRI) const;
85360784Sdim
86321369Sdim  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
87321369Sdim                          MachineRegisterInfo &MRI) const;
88321369Sdim  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
89321369Sdim                           MachineRegisterInfo &MRI) const;
90321369Sdim
91321369Sdim  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
92321369Sdim                           MachineRegisterInfo &MRI) const;
93321369Sdim
94353358Sdim  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
95353358Sdim  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
96353358Sdim
97344779Sdim  // Helper to generate an equivalent of scalar_to_vector into a new register,
98344779Sdim  // returned via 'Dst'.
99353358Sdim  MachineInstr *emitScalarToVector(unsigned EltSize,
100353358Sdim                                   const TargetRegisterClass *DstRC,
101353358Sdim                                   Register Scalar,
102353358Sdim                                   MachineIRBuilder &MIRBuilder) const;
103353358Sdim
104353358Sdim  /// Emit a lane insert into \p DstReg, or a new vector register if None is
105353358Sdim  /// provided.
106353358Sdim  ///
107353358Sdim  /// The lane inserted into is defined by \p LaneIdx. The vector source
108353358Sdim  /// register is given by \p SrcReg. The register containing the element is
109353358Sdim  /// given by \p EltReg.
110353358Sdim  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
111353358Sdim                               Register EltReg, unsigned LaneIdx,
112353358Sdim                               const RegisterBank &RB,
113353358Sdim                               MachineIRBuilder &MIRBuilder) const;
114353358Sdim  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
115344779Sdim  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
116344779Sdim  bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
117353358Sdim  bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
118344779Sdim
119353358Sdim  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
120353358Sdim  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
121353358Sdim  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
122353358Sdim  bool selectSplitVectorUnmerge(MachineInstr &I,
123353358Sdim                                MachineRegisterInfo &MRI) const;
124353358Sdim  bool selectIntrinsicWithSideEffects(MachineInstr &I,
125353358Sdim                                      MachineRegisterInfo &MRI) const;
126353358Sdim  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
127353358Sdim  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
128353358Sdim  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
129353358Sdim  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
130353358Sdim  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
131353358Sdim  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
132360784Sdim  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
133353358Sdim
134353358Sdim  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
135353358Sdim  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
136353358Sdim                                         MachineIRBuilder &MIRBuilder) const;
137353358Sdim
138353358Sdim  // Emit a vector concat operation.
139353358Sdim  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
140353358Sdim                                 Register Op2,
141353358Sdim                                 MachineIRBuilder &MIRBuilder) const;
142353358Sdim  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
143353358Sdim                                   MachineOperand &Predicate,
144353358Sdim                                   MachineIRBuilder &MIRBuilder) const;
145360784Sdim  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
146360784Sdim                        MachineIRBuilder &MIRBuilder) const;
147353358Sdim  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
148353358Sdim                        MachineIRBuilder &MIRBuilder) const;
149353358Sdim  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
150353358Sdim                        MachineIRBuilder &MIRBuilder) const;
151353358Sdim  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
152353358Sdim                                     const RegisterBank &DstRB, LLT ScalarTy,
153353358Sdim                                     Register VecReg, unsigned LaneIdx,
154353358Sdim                                     MachineIRBuilder &MIRBuilder) const;
155353358Sdim
156353358Sdim  /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
157353358Sdim  /// materialized using a FMOV instruction, then update MI and return it.
158353358Sdim  /// Otherwise, do nothing and return a nullptr.
159353358Sdim  MachineInstr *emitFMovForFConstant(MachineInstr &MI,
160353358Sdim                                     MachineRegisterInfo &MRI) const;
161353358Sdim
162353358Sdim  /// Emit a CSet for a compare.
163353358Sdim  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
164353358Sdim                                MachineIRBuilder &MIRBuilder) const;
165353358Sdim
166353358Sdim  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
167353358Sdim  // We use these manually instead of using the importer since it doesn't
168353358Sdim  // support SDNodeXForm.
169353358Sdim  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
170353358Sdim  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
171353358Sdim  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
172353358Sdim  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
173353358Sdim
174360784Sdim  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
175327952Sdim  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
176360784Sdim  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
177321369Sdim
178327952Sdim  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
179327952Sdim                                            unsigned Size) const;
180327952Sdim
181327952Sdim  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
182327952Sdim    return selectAddrModeUnscaled(Root, 1);
183327952Sdim  }
184327952Sdim  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
185327952Sdim    return selectAddrModeUnscaled(Root, 2);
186327952Sdim  }
187327952Sdim  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
188327952Sdim    return selectAddrModeUnscaled(Root, 4);
189327952Sdim  }
190327952Sdim  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
191327952Sdim    return selectAddrModeUnscaled(Root, 8);
192327952Sdim  }
193327952Sdim  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
194327952Sdim    return selectAddrModeUnscaled(Root, 16);
195327952Sdim  }
196327952Sdim
197327952Sdim  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
198327952Sdim                                           unsigned Size) const;
199327952Sdim  template <int Width>
200327952Sdim  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
201327952Sdim    return selectAddrModeIndexed(Root, Width / 8);
202327952Sdim  }
203327952Sdim
204360784Sdim  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
205360784Sdim                                     const MachineRegisterInfo &MRI) const;
206360784Sdim  ComplexRendererFns
207360784Sdim  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
208360784Sdim                                  unsigned SizeInBytes) const;
209341825Sdim
210360784Sdim  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
211360784Sdim  /// or not a shift + extend should be folded into an addressing mode. Returns
212360784Sdim  /// None when this is not profitable or possible.
213360784Sdim  ComplexRendererFns
214360784Sdim  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
215360784Sdim                    MachineOperand &Offset, unsigned SizeInBytes,
216360784Sdim                    bool WantsExt) const;
217360784Sdim  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
218360784Sdim  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
219360784Sdim                                       unsigned SizeInBytes) const;
220360784Sdim  template <int Width>
221360784Sdim  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
222360784Sdim    return selectAddrModeXRO(Root, Width / 8);
223360784Sdim  }
224360784Sdim
225360784Sdim  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
226360784Sdim                                       unsigned SizeInBytes) const;
227360784Sdim  template <int Width>
228360784Sdim  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
229360784Sdim    return selectAddrModeWRO(Root, Width / 8);
230360784Sdim  }
231360784Sdim
232360784Sdim  ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
233360784Sdim
234360784Sdim  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
235360784Sdim    return selectShiftedRegister(Root);
236360784Sdim  }
237360784Sdim
238360784Sdim  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
239360784Sdim    // TODO: selectShiftedRegister should allow for rotates on logical shifts.
240360784Sdim    // For now, make them the same. The only difference between the two is that
241360784Sdim    // logical shifts are allowed to fold in rotates. Otherwise, these are
242360784Sdim    // functionally the same.
243360784Sdim    return selectShiftedRegister(Root);
244360784Sdim  }
245360784Sdim
246360784Sdim  /// Given an extend instruction, determine the correct shift-extend type for
247360784Sdim  /// that instruction.
248360784Sdim  ///
249360784Sdim  /// If the instruction is going to be used in a load or store, pass
250360784Sdim  /// \p IsLoadStore = true.
251360784Sdim  AArch64_AM::ShiftExtendType
252360784Sdim  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
253360784Sdim                       bool IsLoadStore = false) const;
254360784Sdim
255360784Sdim  /// Instructions that accept extend modifiers like UXTW expect the register
256360784Sdim  /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
257360784Sdim  /// subregister copy if necessary. Return either ExtReg, or the result of the
258360784Sdim  /// new copy.
259360784Sdim  Register narrowExtendRegIfNeeded(Register ExtReg,
260360784Sdim                                             MachineIRBuilder &MIB) const;
261360784Sdim  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
262360784Sdim
263360784Sdim  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
264360784Sdim                      int OpIdx = -1) const;
265360784Sdim  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
266360784Sdim                          int OpIdx = -1) const;
267360784Sdim  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
268360784Sdim                          int OpIdx = -1) const;
269360784Sdim
270341825Sdim  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
271341825Sdim  void materializeLargeCMVal(MachineInstr &I, const Value *V,
272360784Sdim                             unsigned OpFlags) const;
273341825Sdim
274353358Sdim  // Optimization methods.
275353358Sdim  bool tryOptVectorShuffle(MachineInstr &I) const;
276353358Sdim  bool tryOptVectorDup(MachineInstr &MI) const;
277353358Sdim  bool tryOptSelect(MachineInstr &MI) const;
278353358Sdim  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
279353358Sdim                                      MachineOperand &Predicate,
280353358Sdim                                      MachineIRBuilder &MIRBuilder) const;
281353358Sdim
282360784Sdim  /// Return true if \p MI is a load or store of \p NumBytes bytes.
283360784Sdim  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
284360784Sdim
285360784Sdim  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
286360784Sdim  /// register zeroed out. In other words, the result of MI has been explicitly
287360784Sdim  /// zero extended.
288360784Sdim  bool isDef32(const MachineInstr &MI) const;
289360784Sdim
290321369Sdim  const AArch64TargetMachine &TM;
291321369Sdim  const AArch64Subtarget &STI;
292321369Sdim  const AArch64InstrInfo &TII;
293321369Sdim  const AArch64RegisterInfo &TRI;
294321369Sdim  const AArch64RegisterBankInfo &RBI;
295321369Sdim
296360784Sdim  bool ProduceNonFlagSettingCondBr = false;
297360784Sdim
298321369Sdim#define GET_GLOBALISEL_PREDICATES_DECL
299321369Sdim#include "AArch64GenGlobalISel.inc"
300321369Sdim#undef GET_GLOBALISEL_PREDICATES_DECL
301321369Sdim
302321369Sdim// We declare the temporaries used by selectImpl() in the class to minimize the
303321369Sdim// cost of constructing placeholder values.
304321369Sdim#define GET_GLOBALISEL_TEMPORARIES_DECL
305321369Sdim#include "AArch64GenGlobalISel.inc"
306321369Sdim#undef GET_GLOBALISEL_TEMPORARIES_DECL
307321369Sdim};
308321369Sdim
309321369Sdim} // end anonymous namespace
310321369Sdim
311321369Sdim#define GET_GLOBALISEL_IMPL
312321369Sdim#include "AArch64GenGlobalISel.inc"
313321369Sdim#undef GET_GLOBALISEL_IMPL
314321369Sdim
315311116SdimAArch64InstructionSelector::AArch64InstructionSelector(
316311116Sdim    const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
317311116Sdim    const AArch64RegisterBankInfo &RBI)
318321369Sdim    : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
319321369Sdim      TRI(*STI.getRegisterInfo()), RBI(RBI),
320321369Sdim#define GET_GLOBALISEL_PREDICATES_INIT
321321369Sdim#include "AArch64GenGlobalISel.inc"
322321369Sdim#undef GET_GLOBALISEL_PREDICATES_INIT
323321369Sdim#define GET_GLOBALISEL_TEMPORARIES_INIT
324321369Sdim#include "AArch64GenGlobalISel.inc"
325321369Sdim#undef GET_GLOBALISEL_TEMPORARIES_INIT
326321369Sdim{
327321369Sdim}
328311116Sdim
329311116Sdim// FIXME: This should be target-independent, inferred from the types declared
330311116Sdim// for each class in the bank.
331311116Sdimstatic const TargetRegisterClass *
332311116SdimgetRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
333329983Sdim                         const RegisterBankInfo &RBI,
334329983Sdim                         bool GetAllRegSet = false) {
335311116Sdim  if (RB.getID() == AArch64::GPRRegBankID) {
336311116Sdim    if (Ty.getSizeInBits() <= 32)
337329983Sdim      return GetAllRegSet ? &AArch64::GPR32allRegClass
338329983Sdim                          : &AArch64::GPR32RegClass;
339311116Sdim    if (Ty.getSizeInBits() == 64)
340329983Sdim      return GetAllRegSet ? &AArch64::GPR64allRegClass
341329983Sdim                          : &AArch64::GPR64RegClass;
342311116Sdim    return nullptr;
343311116Sdim  }
344311116Sdim
345311116Sdim  if (RB.getID() == AArch64::FPRRegBankID) {
346329983Sdim    if (Ty.getSizeInBits() <= 16)
347329983Sdim      return &AArch64::FPR16RegClass;
348311116Sdim    if (Ty.getSizeInBits() == 32)
349311116Sdim      return &AArch64::FPR32RegClass;
350311116Sdim    if (Ty.getSizeInBits() == 64)
351311116Sdim      return &AArch64::FPR64RegClass;
352311116Sdim    if (Ty.getSizeInBits() == 128)
353311116Sdim      return &AArch64::FPR128RegClass;
354311116Sdim    return nullptr;
355311116Sdim  }
356311116Sdim
357311116Sdim  return nullptr;
358311116Sdim}
359311116Sdim
360353358Sdim/// Given a register bank, and size in bits, return the smallest register class
361353358Sdim/// that can represent that combination.
362353358Sdimstatic const TargetRegisterClass *
363353358SdimgetMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
364353358Sdim                      bool GetAllRegSet = false) {
365353358Sdim  unsigned RegBankID = RB.getID();
366353358Sdim
367353358Sdim  if (RegBankID == AArch64::GPRRegBankID) {
368353358Sdim    if (SizeInBits <= 32)
369353358Sdim      return GetAllRegSet ? &AArch64::GPR32allRegClass
370353358Sdim                          : &AArch64::GPR32RegClass;
371353358Sdim    if (SizeInBits == 64)
372353358Sdim      return GetAllRegSet ? &AArch64::GPR64allRegClass
373353358Sdim                          : &AArch64::GPR64RegClass;
374353358Sdim  }
375353358Sdim
376353358Sdim  if (RegBankID == AArch64::FPRRegBankID) {
377353358Sdim    switch (SizeInBits) {
378353358Sdim    default:
379353358Sdim      return nullptr;
380353358Sdim    case 8:
381353358Sdim      return &AArch64::FPR8RegClass;
382353358Sdim    case 16:
383353358Sdim      return &AArch64::FPR16RegClass;
384353358Sdim    case 32:
385353358Sdim      return &AArch64::FPR32RegClass;
386353358Sdim    case 64:
387353358Sdim      return &AArch64::FPR64RegClass;
388353358Sdim    case 128:
389353358Sdim      return &AArch64::FPR128RegClass;
390353358Sdim    }
391353358Sdim  }
392353358Sdim
393353358Sdim  return nullptr;
394353358Sdim}
395353358Sdim
396353358Sdim/// Returns the correct subregister to use for a given register class.
397353358Sdimstatic bool getSubRegForClass(const TargetRegisterClass *RC,
398353358Sdim                              const TargetRegisterInfo &TRI, unsigned &SubReg) {
399353358Sdim  switch (TRI.getRegSizeInBits(*RC)) {
400353358Sdim  case 8:
401353358Sdim    SubReg = AArch64::bsub;
402353358Sdim    break;
403353358Sdim  case 16:
404353358Sdim    SubReg = AArch64::hsub;
405353358Sdim    break;
406353358Sdim  case 32:
407360784Sdim    if (RC != &AArch64::FPR32RegClass)
408353358Sdim      SubReg = AArch64::sub_32;
409353358Sdim    else
410353358Sdim      SubReg = AArch64::ssub;
411353358Sdim    break;
412353358Sdim  case 64:
413353358Sdim    SubReg = AArch64::dsub;
414353358Sdim    break;
415353358Sdim  default:
416353358Sdim    LLVM_DEBUG(
417353358Sdim        dbgs() << "Couldn't find appropriate subregister for register class.");
418353358Sdim    return false;
419353358Sdim  }
420353358Sdim
421353358Sdim  return true;
422353358Sdim}
423353358Sdim
424311116Sdim/// Check whether \p I is a currently unsupported binary operation:
425311116Sdim/// - it has an unsized type
426311116Sdim/// - an operand is not a vreg
427311116Sdim/// - all operands are not in the same bank
428311116Sdim/// These are checks that should someday live in the verifier, but right now,
429311116Sdim/// these are mostly limitations of the aarch64 selector.
430311116Sdimstatic bool unsupportedBinOp(const MachineInstr &I,
431311116Sdim                             const AArch64RegisterBankInfo &RBI,
432311116Sdim                             const MachineRegisterInfo &MRI,
433311116Sdim                             const AArch64RegisterInfo &TRI) {
434311116Sdim  LLT Ty = MRI.getType(I.getOperand(0).getReg());
435311116Sdim  if (!Ty.isValid()) {
436341825Sdim    LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
437311116Sdim    return true;
438311116Sdim  }
439311116Sdim
440311116Sdim  const RegisterBank *PrevOpBank = nullptr;
441311116Sdim  for (auto &MO : I.operands()) {
442311116Sdim    // FIXME: Support non-register operands.
443311116Sdim    if (!MO.isReg()) {
444341825Sdim      LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
445311116Sdim      return true;
446311116Sdim    }
447311116Sdim
448311116Sdim    // FIXME: Can generic operations have physical registers operands? If
449311116Sdim    // so, this will need to be taught about that, and we'll need to get the
450311116Sdim    // bank out of the minimal class for the register.
451311116Sdim    // Either way, this needs to be documented (and possibly verified).
452360784Sdim    if (!Register::isVirtualRegister(MO.getReg())) {
453341825Sdim      LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
454311116Sdim      return true;
455311116Sdim    }
456311116Sdim
457311116Sdim    const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
458311116Sdim    if (!OpBank) {
459341825Sdim      LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
460311116Sdim      return true;
461311116Sdim    }
462311116Sdim
463311116Sdim    if (PrevOpBank && OpBank != PrevOpBank) {
464341825Sdim      LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
465311116Sdim      return true;
466311116Sdim    }
467311116Sdim    PrevOpBank = OpBank;
468311116Sdim  }
469311116Sdim  return false;
470311116Sdim}
471311116Sdim
472311116Sdim/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
473321369Sdim/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
474311116Sdim/// and of size \p OpSize.
475311116Sdim/// \returns \p GenericOpc if the combination is unsupported.
476311116Sdimstatic unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
477311116Sdim                               unsigned OpSize) {
478311116Sdim  switch (RegBankID) {
479311116Sdim  case AArch64::GPRRegBankID:
480321369Sdim    if (OpSize == 32) {
481311116Sdim      switch (GenericOpc) {
482311116Sdim      case TargetOpcode::G_SHL:
483311116Sdim        return AArch64::LSLVWr;
484311116Sdim      case TargetOpcode::G_LSHR:
485311116Sdim        return AArch64::LSRVWr;
486311116Sdim      case TargetOpcode::G_ASHR:
487311116Sdim        return AArch64::ASRVWr;
488311116Sdim      default:
489311116Sdim        return GenericOpc;
490311116Sdim      }
491311116Sdim    } else if (OpSize == 64) {
492311116Sdim      switch (GenericOpc) {
493360784Sdim      case TargetOpcode::G_PTR_ADD:
494311116Sdim        return AArch64::ADDXrr;
495311116Sdim      case TargetOpcode::G_SHL:
496311116Sdim        return AArch64::LSLVXr;
497311116Sdim      case TargetOpcode::G_LSHR:
498311116Sdim        return AArch64::LSRVXr;
499311116Sdim      case TargetOpcode::G_ASHR:
500311116Sdim        return AArch64::ASRVXr;
501311116Sdim      default:
502311116Sdim        return GenericOpc;
503311116Sdim      }
504311116Sdim    }
505321369Sdim    break;
506311116Sdim  case AArch64::FPRRegBankID:
507311116Sdim    switch (OpSize) {
508311116Sdim    case 32:
509311116Sdim      switch (GenericOpc) {
510311116Sdim      case TargetOpcode::G_FADD:
511311116Sdim        return AArch64::FADDSrr;
512311116Sdim      case TargetOpcode::G_FSUB:
513311116Sdim        return AArch64::FSUBSrr;
514311116Sdim      case TargetOpcode::G_FMUL:
515311116Sdim        return AArch64::FMULSrr;
516311116Sdim      case TargetOpcode::G_FDIV:
517311116Sdim        return AArch64::FDIVSrr;
518311116Sdim      default:
519311116Sdim        return GenericOpc;
520311116Sdim      }
521311116Sdim    case 64:
522311116Sdim      switch (GenericOpc) {
523311116Sdim      case TargetOpcode::G_FADD:
524311116Sdim        return AArch64::FADDDrr;
525311116Sdim      case TargetOpcode::G_FSUB:
526311116Sdim        return AArch64::FSUBDrr;
527311116Sdim      case TargetOpcode::G_FMUL:
528311116Sdim        return AArch64::FMULDrr;
529311116Sdim      case TargetOpcode::G_FDIV:
530311116Sdim        return AArch64::FDIVDrr;
531311116Sdim      case TargetOpcode::G_OR:
532311116Sdim        return AArch64::ORRv8i8;
533311116Sdim      default:
534311116Sdim        return GenericOpc;
535311116Sdim      }
536311116Sdim    }
537321369Sdim    break;
538321369Sdim  }
539311116Sdim  return GenericOpc;
540311116Sdim}
541311116Sdim
542311116Sdim/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
543311116Sdim/// appropriate for the (value) register bank \p RegBankID and of memory access
544311116Sdim/// size \p OpSize.  This returns the variant with the base+unsigned-immediate
545311116Sdim/// addressing mode (e.g., LDRXui).
546311116Sdim/// \returns \p GenericOpc if the combination is unsupported.
547311116Sdimstatic unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
548311116Sdim                                    unsigned OpSize) {
549311116Sdim  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
550311116Sdim  switch (RegBankID) {
551311116Sdim  case AArch64::GPRRegBankID:
552311116Sdim    switch (OpSize) {
553311116Sdim    case 8:
554311116Sdim      return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
555311116Sdim    case 16:
556311116Sdim      return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
557311116Sdim    case 32:
558311116Sdim      return isStore ? AArch64::STRWui : AArch64::LDRWui;
559311116Sdim    case 64:
560311116Sdim      return isStore ? AArch64::STRXui : AArch64::LDRXui;
561311116Sdim    }
562321369Sdim    break;
563311116Sdim  case AArch64::FPRRegBankID:
564311116Sdim    switch (OpSize) {
565311116Sdim    case 8:
566311116Sdim      return isStore ? AArch64::STRBui : AArch64::LDRBui;
567311116Sdim    case 16:
568311116Sdim      return isStore ? AArch64::STRHui : AArch64::LDRHui;
569311116Sdim    case 32:
570311116Sdim      return isStore ? AArch64::STRSui : AArch64::LDRSui;
571311116Sdim    case 64:
572311116Sdim      return isStore ? AArch64::STRDui : AArch64::LDRDui;
573311116Sdim    }
574321369Sdim    break;
575321369Sdim  }
576311116Sdim  return GenericOpc;
577311116Sdim}
578311116Sdim
579353358Sdim#ifndef NDEBUG
580353358Sdim/// Helper function that verifies that we have a valid copy at the end of
581353358Sdim/// selectCopy. Verifies that the source and dest have the expected sizes and
582353358Sdim/// then returns true.
583353358Sdimstatic bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
584353358Sdim                        const MachineRegisterInfo &MRI,
585353358Sdim                        const TargetRegisterInfo &TRI,
586353358Sdim                        const RegisterBankInfo &RBI) {
587360784Sdim  const Register DstReg = I.getOperand(0).getReg();
588360784Sdim  const Register SrcReg = I.getOperand(1).getReg();
589353358Sdim  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
590353358Sdim  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
591329983Sdim
592353358Sdim  // Make sure the size of the source and dest line up.
593353358Sdim  assert(
594353358Sdim      (DstSize == SrcSize ||
595353358Sdim       // Copies are a mean to setup initial types, the number of
596353358Sdim       // bits may not exactly match.
597360784Sdim       (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
598353358Sdim       // Copies are a mean to copy bits around, as long as we are
599353358Sdim       // on the same register class, that's fine. Otherwise, that
600353358Sdim       // means we need some SUBREG_TO_REG or AND & co.
601353358Sdim       (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
602353358Sdim      "Copy with different width?!");
603353358Sdim
604353358Sdim  // Check the size of the destination.
605353358Sdim  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
606353358Sdim         "GPRs cannot get more than 64-bit width values");
607353358Sdim
608353358Sdim  return true;
609353358Sdim}
610353358Sdim#endif
611353358Sdim
612353358Sdim/// Helper function for selectCopy. Inserts a subregister copy from
613353358Sdim/// \p *From to \p *To, linking it up to \p I.
614353358Sdim///
615353358Sdim/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
616353358Sdim///
617353358Sdim/// CopyReg (From class) = COPY SrcReg
618353358Sdim/// SubRegCopy (To class) = COPY CopyReg:SubReg
619353358Sdim/// Dst = COPY SubRegCopy
620353358Sdimstatic bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
621360784Sdim                                  const RegisterBankInfo &RBI, Register SrcReg,
622353358Sdim                                  const TargetRegisterClass *From,
623353358Sdim                                  const TargetRegisterClass *To,
624353358Sdim                                  unsigned SubReg) {
625353358Sdim  MachineIRBuilder MIB(I);
626353358Sdim  auto Copy = MIB.buildCopy({From}, {SrcReg});
627353358Sdim  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
628353358Sdim                        .addReg(Copy.getReg(0), 0, SubReg);
629329983Sdim  MachineOperand &RegOp = I.getOperand(1);
630353358Sdim  RegOp.setReg(SubRegCopy.getReg(0));
631353358Sdim
632353358Sdim  // It's possible that the destination register won't be constrained. Make
633353358Sdim  // sure that happens.
634360784Sdim  if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
635353358Sdim    RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
636353358Sdim
637329983Sdim  return true;
638329983Sdim}
639329983Sdim
640353358Sdim/// Helper function to get the source and destination register classes for a
641353358Sdim/// copy. Returns a std::pair containing the source register class for the
642353358Sdim/// copy, and the destination register class for the copy. If a register class
643353358Sdim/// cannot be determined, then it will be nullptr.
644353358Sdimstatic std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
645353358SdimgetRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
646353358Sdim                     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
647353358Sdim                     const RegisterBankInfo &RBI) {
648360784Sdim  Register DstReg = I.getOperand(0).getReg();
649360784Sdim  Register SrcReg = I.getOperand(1).getReg();
650353358Sdim  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
651353358Sdim  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
652353358Sdim  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
653353358Sdim  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
654353358Sdim
655353358Sdim  // Special casing for cross-bank copies of s1s. We can technically represent
656353358Sdim  // a 1-bit value with any size of register. The minimum size for a GPR is 32
657353358Sdim  // bits. So, we need to put the FPR on 32 bits as well.
658353358Sdim  //
659353358Sdim  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
660353358Sdim  // then we can pull it into the helpers that get the appropriate class for a
661353358Sdim  // register bank. Or make a new helper that carries along some constraint
662353358Sdim  // information.
663353358Sdim  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
664353358Sdim    SrcSize = DstSize = 32;
665353358Sdim
666353358Sdim  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
667353358Sdim          getMinClassForRegBank(DstRegBank, DstSize, true)};
668353358Sdim}
669353358Sdim
670311116Sdimstatic bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
671311116Sdim                       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
672311116Sdim                       const RegisterBankInfo &RBI) {
673311116Sdim
674360784Sdim  Register DstReg = I.getOperand(0).getReg();
675360784Sdim  Register SrcReg = I.getOperand(1).getReg();
676353358Sdim  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
677353358Sdim  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
678329983Sdim
679353358Sdim  // Find the correct register classes for the source and destination registers.
680353358Sdim  const TargetRegisterClass *SrcRC;
681353358Sdim  const TargetRegisterClass *DstRC;
682353358Sdim  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
683311116Sdim
684353358Sdim  if (!DstRC) {
685353358Sdim    LLVM_DEBUG(dbgs() << "Unexpected dest size "
686353358Sdim                      << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
687329983Sdim    return false;
688329983Sdim  }
689329983Sdim
690353358Sdim  // A couple helpers below, for making sure that the copy we produce is valid.
691353358Sdim
692353358Sdim  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
693353358Sdim  // to verify that the src and dst are the same size, since that's handled by
694353358Sdim  // the SUBREG_TO_REG.
695353358Sdim  bool KnownValid = false;
696353358Sdim
697353358Sdim  // Returns true, or asserts if something we don't expect happens. Instead of
698353358Sdim  // returning true, we return isValidCopy() to ensure that we verify the
699353358Sdim  // result.
700353358Sdim  auto CheckCopy = [&]() {
701353358Sdim    // If we have a bitcast or something, we can't have physical registers.
702360784Sdim    assert((I.isCopy() ||
703360784Sdim            (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
704360784Sdim             !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
705360784Sdim           "No phys reg on generic operator!");
706353358Sdim    assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
707353358Sdim    (void)KnownValid;
708353358Sdim    return true;
709353358Sdim  };
710353358Sdim
711353358Sdim  // Is this a copy? If so, then we may need to insert a subregister copy, or
712353358Sdim  // a SUBREG_TO_REG.
713353358Sdim  if (I.isCopy()) {
714353358Sdim    // Yes. Check if there's anything to fix up.
715329983Sdim    if (!SrcRC) {
716353358Sdim      LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
717353358Sdim      return false;
718311116Sdim    }
719353358Sdim
720360784Sdim    unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
721360784Sdim    unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
722353358Sdim
723360784Sdim    // If we're doing a cross-bank copy on different-sized registers, we need
724360784Sdim    // to do a bit more work.
725360784Sdim    if (SrcSize > DstSize) {
726360784Sdim      // We're doing a cross-bank copy into a smaller register. We need a
727360784Sdim      // subregister copy. First, get a register class that's on the same bank
728360784Sdim      // as the destination, but the same size as the source.
729360784Sdim      const TargetRegisterClass *SubregRC =
730360784Sdim          getMinClassForRegBank(DstRegBank, SrcSize, true);
731360784Sdim      assert(SubregRC && "Didn't get a register class for subreg?");
732353358Sdim
733360784Sdim      // Get the appropriate subregister for the destination.
734360784Sdim      unsigned SubReg = 0;
735360784Sdim      if (!getSubRegForClass(DstRC, TRI, SubReg)) {
736360784Sdim        LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
737360784Sdim        return false;
738353358Sdim      }
739353358Sdim
740360784Sdim      // Now, insert a subregister copy using the new register class.
741360784Sdim      selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
742360784Sdim      return CheckCopy();
743360784Sdim    }
744360784Sdim
745360784Sdim    // Is this a cross-bank copy?
746360784Sdim    if (DstRegBank.getID() != SrcRegBank.getID()) {
747360784Sdim      if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
748360784Sdim          SrcSize == 16) {
749353358Sdim        // Special case for FPR16 to GPR32.
750353358Sdim        // FIXME: This can probably be generalized like the above case.
751360784Sdim        Register PromoteReg =
752353358Sdim            MRI.createVirtualRegister(&AArch64::FPR32RegClass);
753353358Sdim        BuildMI(*I.getParent(), I, I.getDebugLoc(),
754353358Sdim                TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
755353358Sdim            .addImm(0)
756353358Sdim            .addUse(SrcReg)
757353358Sdim            .addImm(AArch64::hsub);
758353358Sdim        MachineOperand &RegOp = I.getOperand(1);
759353358Sdim        RegOp.setReg(PromoteReg);
760353358Sdim
761353358Sdim        // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
762353358Sdim        KnownValid = true;
763353358Sdim      }
764329983Sdim    }
765353358Sdim
766353358Sdim    // If the destination is a physical register, then there's nothing to
767353358Sdim    // change, so we're done.
768360784Sdim    if (Register::isPhysicalRegister(DstReg))
769353358Sdim      return CheckCopy();
770311116Sdim  }
771311116Sdim
772353358Sdim  // No need to constrain SrcReg. It will get constrained when we hit another
773353358Sdim  // of its use or its defs. Copies do not have constraints.
774353358Sdim  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
775341825Sdim    LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
776341825Sdim                      << " operand\n");
777311116Sdim    return false;
778311116Sdim  }
779311116Sdim  I.setDesc(TII.get(AArch64::COPY));
780353358Sdim  return CheckCopy();
781311116Sdim}
782311116Sdim
783311116Sdimstatic unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
784311116Sdim  if (!DstTy.isScalar() || !SrcTy.isScalar())
785311116Sdim    return GenericOpc;
786311116Sdim
787311116Sdim  const unsigned DstSize = DstTy.getSizeInBits();
788311116Sdim  const unsigned SrcSize = SrcTy.getSizeInBits();
789311116Sdim
790311116Sdim  switch (DstSize) {
791311116Sdim  case 32:
792311116Sdim    switch (SrcSize) {
793311116Sdim    case 32:
794311116Sdim      switch (GenericOpc) {
795311116Sdim      case TargetOpcode::G_SITOFP:
796311116Sdim        return AArch64::SCVTFUWSri;
797311116Sdim      case TargetOpcode::G_UITOFP:
798311116Sdim        return AArch64::UCVTFUWSri;
799311116Sdim      case TargetOpcode::G_FPTOSI:
800311116Sdim        return AArch64::FCVTZSUWSr;
801311116Sdim      case TargetOpcode::G_FPTOUI:
802311116Sdim        return AArch64::FCVTZUUWSr;
803311116Sdim      default:
804311116Sdim        return GenericOpc;
805311116Sdim      }
806311116Sdim    case 64:
807311116Sdim      switch (GenericOpc) {
808311116Sdim      case TargetOpcode::G_SITOFP:
809311116Sdim        return AArch64::SCVTFUXSri;
810311116Sdim      case TargetOpcode::G_UITOFP:
811311116Sdim        return AArch64::UCVTFUXSri;
812311116Sdim      case TargetOpcode::G_FPTOSI:
813311116Sdim        return AArch64::FCVTZSUWDr;
814311116Sdim      case TargetOpcode::G_FPTOUI:
815311116Sdim        return AArch64::FCVTZUUWDr;
816311116Sdim      default:
817311116Sdim        return GenericOpc;
818311116Sdim      }
819311116Sdim    default:
820311116Sdim      return GenericOpc;
821311116Sdim    }
822311116Sdim  case 64:
823311116Sdim    switch (SrcSize) {
824311116Sdim    case 32:
825311116Sdim      switch (GenericOpc) {
826311116Sdim      case TargetOpcode::G_SITOFP:
827311116Sdim        return AArch64::SCVTFUWDri;
828311116Sdim      case TargetOpcode::G_UITOFP:
829311116Sdim        return AArch64::UCVTFUWDri;
830311116Sdim      case TargetOpcode::G_FPTOSI:
831311116Sdim        return AArch64::FCVTZSUXSr;
832311116Sdim      case TargetOpcode::G_FPTOUI:
833311116Sdim        return AArch64::FCVTZUUXSr;
834311116Sdim      default:
835311116Sdim        return GenericOpc;
836311116Sdim      }
837311116Sdim    case 64:
838311116Sdim      switch (GenericOpc) {
839311116Sdim      case TargetOpcode::G_SITOFP:
840311116Sdim        return AArch64::SCVTFUXDri;
841311116Sdim      case TargetOpcode::G_UITOFP:
842311116Sdim        return AArch64::UCVTFUXDri;
843311116Sdim      case TargetOpcode::G_FPTOSI:
844311116Sdim        return AArch64::FCVTZSUXDr;
845311116Sdim      case TargetOpcode::G_FPTOUI:
846311116Sdim        return AArch64::FCVTZUUXDr;
847311116Sdim      default:
848311116Sdim        return GenericOpc;
849311116Sdim      }
850311116Sdim    default:
851311116Sdim      return GenericOpc;
852311116Sdim    }
853311116Sdim  default:
854311116Sdim    return GenericOpc;
855311116Sdim  };
856311116Sdim  return GenericOpc;
857311116Sdim}
858311116Sdim
859353358Sdimstatic unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
860353358Sdim                                const RegisterBankInfo &RBI) {
861353358Sdim  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
862353358Sdim  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
863353358Sdim               AArch64::GPRRegBankID);
864353358Sdim  LLT Ty = MRI.getType(I.getOperand(0).getReg());
865353358Sdim  if (Ty == LLT::scalar(32))
866353358Sdim    return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
867353358Sdim  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
868353358Sdim    return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
869353358Sdim  return 0;
870353358Sdim}
871353358Sdim
872353358Sdim/// Helper function to select the opcode for a G_FCMP.
873353358Sdimstatic unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
874353358Sdim  // If this is a compare against +0.0, then we don't have to explicitly
875353358Sdim  // materialize a constant.
876353358Sdim  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
877353358Sdim  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
878353358Sdim  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
879353358Sdim  if (OpSize != 32 && OpSize != 64)
880353358Sdim    return 0;
881353358Sdim  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
882353358Sdim                              {AArch64::FCMPSri, AArch64::FCMPDri}};
883353358Sdim  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
884353358Sdim}
885353358Sdim
886353358Sdim/// Returns true if \p P is an unsigned integer comparison predicate.
887353358Sdimstatic bool isUnsignedICMPPred(const CmpInst::Predicate P) {
888353358Sdim  switch (P) {
889353358Sdim  default:
890353358Sdim    return false;
891353358Sdim  case CmpInst::ICMP_UGT:
892353358Sdim  case CmpInst::ICMP_UGE:
893353358Sdim  case CmpInst::ICMP_ULT:
894353358Sdim  case CmpInst::ICMP_ULE:
895353358Sdim    return true;
896353358Sdim  }
897353358Sdim}
898353358Sdim
899311116Sdimstatic AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
900311116Sdim  switch (P) {
901311116Sdim  default:
902311116Sdim    llvm_unreachable("Unknown condition code!");
903311116Sdim  case CmpInst::ICMP_NE:
904311116Sdim    return AArch64CC::NE;
905311116Sdim  case CmpInst::ICMP_EQ:
906311116Sdim    return AArch64CC::EQ;
907311116Sdim  case CmpInst::ICMP_SGT:
908311116Sdim    return AArch64CC::GT;
909311116Sdim  case CmpInst::ICMP_SGE:
910311116Sdim    return AArch64CC::GE;
911311116Sdim  case CmpInst::ICMP_SLT:
912311116Sdim    return AArch64CC::LT;
913311116Sdim  case CmpInst::ICMP_SLE:
914311116Sdim    return AArch64CC::LE;
915311116Sdim  case CmpInst::ICMP_UGT:
916311116Sdim    return AArch64CC::HI;
917311116Sdim  case CmpInst::ICMP_UGE:
918311116Sdim    return AArch64CC::HS;
919311116Sdim  case CmpInst::ICMP_ULT:
920311116Sdim    return AArch64CC::LO;
921311116Sdim  case CmpInst::ICMP_ULE:
922311116Sdim    return AArch64CC::LS;
923311116Sdim  }
924311116Sdim}
925311116Sdim
926311116Sdimstatic void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
927311116Sdim                                      AArch64CC::CondCode &CondCode,
928311116Sdim                                      AArch64CC::CondCode &CondCode2) {
929311116Sdim  CondCode2 = AArch64CC::AL;
930311116Sdim  switch (P) {
931311116Sdim  default:
932311116Sdim    llvm_unreachable("Unknown FP condition!");
933311116Sdim  case CmpInst::FCMP_OEQ:
934311116Sdim    CondCode = AArch64CC::EQ;
935311116Sdim    break;
936311116Sdim  case CmpInst::FCMP_OGT:
937311116Sdim    CondCode = AArch64CC::GT;
938311116Sdim    break;
939311116Sdim  case CmpInst::FCMP_OGE:
940311116Sdim    CondCode = AArch64CC::GE;
941311116Sdim    break;
942311116Sdim  case CmpInst::FCMP_OLT:
943311116Sdim    CondCode = AArch64CC::MI;
944311116Sdim    break;
945311116Sdim  case CmpInst::FCMP_OLE:
946311116Sdim    CondCode = AArch64CC::LS;
947311116Sdim    break;
948311116Sdim  case CmpInst::FCMP_ONE:
949311116Sdim    CondCode = AArch64CC::MI;
950311116Sdim    CondCode2 = AArch64CC::GT;
951311116Sdim    break;
952311116Sdim  case CmpInst::FCMP_ORD:
953311116Sdim    CondCode = AArch64CC::VC;
954311116Sdim    break;
955311116Sdim  case CmpInst::FCMP_UNO:
956311116Sdim    CondCode = AArch64CC::VS;
957311116Sdim    break;
958311116Sdim  case CmpInst::FCMP_UEQ:
959311116Sdim    CondCode = AArch64CC::EQ;
960311116Sdim    CondCode2 = AArch64CC::VS;
961311116Sdim    break;
962311116Sdim  case CmpInst::FCMP_UGT:
963311116Sdim    CondCode = AArch64CC::HI;
964311116Sdim    break;
965311116Sdim  case CmpInst::FCMP_UGE:
966311116Sdim    CondCode = AArch64CC::PL;
967311116Sdim    break;
968311116Sdim  case CmpInst::FCMP_ULT:
969311116Sdim    CondCode = AArch64CC::LT;
970311116Sdim    break;
971311116Sdim  case CmpInst::FCMP_ULE:
972311116Sdim    CondCode = AArch64CC::LE;
973311116Sdim    break;
974311116Sdim  case CmpInst::FCMP_UNE:
975311116Sdim    CondCode = AArch64CC::NE;
976311116Sdim    break;
977311116Sdim  }
978311116Sdim}
979311116Sdim
980321369Sdimbool AArch64InstructionSelector::selectCompareBranch(
981321369Sdim    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
982321369Sdim
983353358Sdim  const Register CondReg = I.getOperand(0).getReg();
984321369Sdim  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
985321369Sdim  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
986327952Sdim  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
987327952Sdim    CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
988321369Sdim  if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
989321369Sdim    return false;
990321369Sdim
991353358Sdim  Register LHS = CCMI->getOperand(2).getReg();
992353358Sdim  Register RHS = CCMI->getOperand(3).getReg();
993353358Sdim  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
994353358Sdim  if (!VRegAndVal)
995321369Sdim    std::swap(RHS, LHS);
996321369Sdim
997353358Sdim  VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
998353358Sdim  if (!VRegAndVal || VRegAndVal->Value != 0) {
999353358Sdim    MachineIRBuilder MIB(I);
1000353358Sdim    // If we can't select a CBZ then emit a cmp + Bcc.
1001353358Sdim    if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
1002353358Sdim                            CCMI->getOperand(1), MIB))
1003353358Sdim      return false;
1004353358Sdim    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1005353358Sdim        (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
1006353358Sdim    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1007353358Sdim    I.eraseFromParent();
1008353358Sdim    return true;
1009353358Sdim  }
1010321369Sdim
1011321369Sdim  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
1012321369Sdim  if (RB.getID() != AArch64::GPRRegBankID)
1013321369Sdim    return false;
1014321369Sdim
1015321369Sdim  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
1016321369Sdim  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
1017321369Sdim    return false;
1018321369Sdim
1019321369Sdim  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
1020321369Sdim  unsigned CBOpc = 0;
1021321369Sdim  if (CmpWidth <= 32)
1022321369Sdim    CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
1023321369Sdim  else if (CmpWidth == 64)
1024321369Sdim    CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
1025321369Sdim  else
1026321369Sdim    return false;
1027321369Sdim
1028341825Sdim  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1029341825Sdim      .addUse(LHS)
1030341825Sdim      .addMBB(DestMBB)
1031341825Sdim      .constrainAllUses(TII, TRI, RBI);
1032321369Sdim
1033321369Sdim  I.eraseFromParent();
1034321369Sdim  return true;
1035321369Sdim}
1036321369Sdim
1037360784Sdim/// Returns the element immediate value of a vector shift operand if found.
1038360784Sdim/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1039360784Sdimstatic Optional<int64_t> getVectorShiftImm(Register Reg,
1040360784Sdim                                           MachineRegisterInfo &MRI) {
1041360784Sdim  assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1042360784Sdim  MachineInstr *OpMI = MRI.getVRegDef(Reg);
1043360784Sdim  assert(OpMI && "Expected to find a vreg def for vector shift operand");
1044360784Sdim  if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1045360784Sdim    return None;
1046360784Sdim
1047360784Sdim  // Check all operands are identical immediates.
1048360784Sdim  int64_t ImmVal = 0;
1049360784Sdim  for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1050360784Sdim    auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1051360784Sdim    if (!VRegAndVal)
1052360784Sdim      return None;
1053360784Sdim
1054360784Sdim    if (Idx == 1)
1055360784Sdim      ImmVal = VRegAndVal->Value;
1056360784Sdim    if (ImmVal != VRegAndVal->Value)
1057360784Sdim      return None;
1058360784Sdim  }
1059360784Sdim
1060360784Sdim  return ImmVal;
1061360784Sdim}
1062360784Sdim
1063360784Sdim/// Matches and returns the shift immediate value for a SHL instruction given
1064360784Sdim/// a shift operand.
1065360784Sdimstatic Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1066360784Sdim  Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1067360784Sdim  if (!ShiftImm)
1068360784Sdim    return None;
1069360784Sdim  // Check the immediate is in range for a SHL.
1070360784Sdim  int64_t Imm = *ShiftImm;
1071360784Sdim  if (Imm < 0)
1072360784Sdim    return None;
1073360784Sdim  switch (SrcTy.getElementType().getSizeInBits()) {
1074360784Sdim  default:
1075360784Sdim    LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1076360784Sdim    return None;
1077360784Sdim  case 8:
1078360784Sdim    if (Imm > 7)
1079360784Sdim      return None;
1080360784Sdim    break;
1081360784Sdim  case 16:
1082360784Sdim    if (Imm > 15)
1083360784Sdim      return None;
1084360784Sdim    break;
1085360784Sdim  case 32:
1086360784Sdim    if (Imm > 31)
1087360784Sdim      return None;
1088360784Sdim    break;
1089360784Sdim  case 64:
1090360784Sdim    if (Imm > 63)
1091360784Sdim      return None;
1092360784Sdim    break;
1093360784Sdim  }
1094360784Sdim  return Imm;
1095360784Sdim}
1096360784Sdim
1097353358Sdimbool AArch64InstructionSelector::selectVectorSHL(
1098353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
1099353358Sdim  assert(I.getOpcode() == TargetOpcode::G_SHL);
1100353358Sdim  Register DstReg = I.getOperand(0).getReg();
1101353358Sdim  const LLT Ty = MRI.getType(DstReg);
1102353358Sdim  Register Src1Reg = I.getOperand(1).getReg();
1103353358Sdim  Register Src2Reg = I.getOperand(2).getReg();
1104353358Sdim
1105353358Sdim  if (!Ty.isVector())
1106353358Sdim    return false;
1107353358Sdim
1108360784Sdim  // Check if we have a vector of constants on RHS that we can select as the
1109360784Sdim  // immediate form.
1110360784Sdim  Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1111360784Sdim
1112353358Sdim  unsigned Opc = 0;
1113360784Sdim  if (Ty == LLT::vector(2, 64)) {
1114360784Sdim    Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1115360784Sdim  } else if (Ty == LLT::vector(4, 32)) {
1116360784Sdim    Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1117353358Sdim  } else if (Ty == LLT::vector(2, 32)) {
1118360784Sdim    Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1119353358Sdim  } else {
1120353358Sdim    LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1121353358Sdim    return false;
1122353358Sdim  }
1123353358Sdim
1124353358Sdim  MachineIRBuilder MIB(I);
1125360784Sdim  auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1126360784Sdim  if (ImmVal)
1127360784Sdim    Shl.addImm(*ImmVal);
1128360784Sdim  else
1129360784Sdim    Shl.addUse(Src2Reg);
1130360784Sdim  constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1131353358Sdim  I.eraseFromParent();
1132353358Sdim  return true;
1133353358Sdim}
1134353358Sdim
1135353358Sdimbool AArch64InstructionSelector::selectVectorASHR(
1136353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
1137353358Sdim  assert(I.getOpcode() == TargetOpcode::G_ASHR);
1138353358Sdim  Register DstReg = I.getOperand(0).getReg();
1139353358Sdim  const LLT Ty = MRI.getType(DstReg);
1140353358Sdim  Register Src1Reg = I.getOperand(1).getReg();
1141353358Sdim  Register Src2Reg = I.getOperand(2).getReg();
1142353358Sdim
1143353358Sdim  if (!Ty.isVector())
1144353358Sdim    return false;
1145353358Sdim
1146353358Sdim  // There is not a shift right register instruction, but the shift left
1147353358Sdim  // register instruction takes a signed value, where negative numbers specify a
1148353358Sdim  // right shift.
1149353358Sdim
1150353358Sdim  unsigned Opc = 0;
1151353358Sdim  unsigned NegOpc = 0;
1152353358Sdim  const TargetRegisterClass *RC = nullptr;
1153360784Sdim  if (Ty == LLT::vector(2, 64)) {
1154360784Sdim    Opc = AArch64::SSHLv2i64;
1155360784Sdim    NegOpc = AArch64::NEGv2i64;
1156360784Sdim    RC = &AArch64::FPR128RegClass;
1157360784Sdim  } else if (Ty == LLT::vector(4, 32)) {
1158353358Sdim    Opc = AArch64::SSHLv4i32;
1159353358Sdim    NegOpc = AArch64::NEGv4i32;
1160353358Sdim    RC = &AArch64::FPR128RegClass;
1161353358Sdim  } else if (Ty == LLT::vector(2, 32)) {
1162353358Sdim    Opc = AArch64::SSHLv2i32;
1163353358Sdim    NegOpc = AArch64::NEGv2i32;
1164353358Sdim    RC = &AArch64::FPR64RegClass;
1165353358Sdim  } else {
1166353358Sdim    LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1167353358Sdim    return false;
1168353358Sdim  }
1169353358Sdim
1170353358Sdim  MachineIRBuilder MIB(I);
1171353358Sdim  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1172353358Sdim  constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1173353358Sdim  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1174353358Sdim  constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1175353358Sdim  I.eraseFromParent();
1176353358Sdim  return true;
1177353358Sdim}
1178353358Sdim
1179321369Sdimbool AArch64InstructionSelector::selectVaStartAAPCS(
1180321369Sdim    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1181321369Sdim  return false;
1182321369Sdim}
1183321369Sdim
1184321369Sdimbool AArch64InstructionSelector::selectVaStartDarwin(
1185321369Sdim    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1186321369Sdim  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1187353358Sdim  Register ListReg = I.getOperand(0).getReg();
1188321369Sdim
1189353358Sdim  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1190321369Sdim
1191321369Sdim  auto MIB =
1192321369Sdim      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1193321369Sdim          .addDef(ArgsAddrReg)
1194321369Sdim          .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1195321369Sdim          .addImm(0)
1196321369Sdim          .addImm(0);
1197321369Sdim
1198321369Sdim  constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1199321369Sdim
1200321369Sdim  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1201321369Sdim            .addUse(ArgsAddrReg)
1202321369Sdim            .addUse(ListReg)
1203321369Sdim            .addImm(0)
1204321369Sdim            .addMemOperand(*I.memoperands_begin());
1205321369Sdim
1206321369Sdim  constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1207321369Sdim  I.eraseFromParent();
1208321369Sdim  return true;
1209321369Sdim}
1210321369Sdim
1211341825Sdimvoid AArch64InstructionSelector::materializeLargeCMVal(
1212360784Sdim    MachineInstr &I, const Value *V, unsigned OpFlags) const {
1213341825Sdim  MachineBasicBlock &MBB = *I.getParent();
1214341825Sdim  MachineFunction &MF = *MBB.getParent();
1215341825Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
1216341825Sdim  MachineIRBuilder MIB(I);
1217341825Sdim
1218344779Sdim  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1219341825Sdim  MovZ->addOperand(MF, I.getOperand(1));
1220341825Sdim  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1221341825Sdim                                     AArch64II::MO_NC);
1222341825Sdim  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1223341825Sdim  constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1224341825Sdim
1225353358Sdim  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1226353358Sdim                       Register ForceDstReg) {
1227353358Sdim    Register DstReg = ForceDstReg
1228341825Sdim                          ? ForceDstReg
1229341825Sdim                          : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1230341825Sdim    auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1231341825Sdim    if (auto *GV = dyn_cast<GlobalValue>(V)) {
1232341825Sdim      MovI->addOperand(MF, MachineOperand::CreateGA(
1233341825Sdim                               GV, MovZ->getOperand(1).getOffset(), Flags));
1234341825Sdim    } else {
1235341825Sdim      MovI->addOperand(
1236341825Sdim          MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1237341825Sdim                                       MovZ->getOperand(1).getOffset(), Flags));
1238341825Sdim    }
1239341825Sdim    MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1240341825Sdim    constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1241341825Sdim    return DstReg;
1242341825Sdim  };
1243353358Sdim  Register DstReg = BuildMovK(MovZ.getReg(0),
1244341825Sdim                              AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1245341825Sdim  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1246341825Sdim  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1247341825Sdim  return;
1248341825Sdim}
1249341825Sdim
1250353358Sdimvoid AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1251353358Sdim  MachineBasicBlock &MBB = *I.getParent();
1252353358Sdim  MachineFunction &MF = *MBB.getParent();
1253353358Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
1254353358Sdim
1255353358Sdim  switch (I.getOpcode()) {
1256353358Sdim  case TargetOpcode::G_SHL:
1257353358Sdim  case TargetOpcode::G_ASHR:
1258353358Sdim  case TargetOpcode::G_LSHR: {
1259353358Sdim    // These shifts are legalized to have 64 bit shift amounts because we want
1260353358Sdim    // to take advantage of the existing imported selection patterns that assume
1261353358Sdim    // the immediates are s64s. However, if the shifted type is 32 bits and for
1262353358Sdim    // some reason we receive input GMIR that has an s64 shift amount that's not
1263353358Sdim    // a G_CONSTANT, insert a truncate so that we can still select the s32
1264353358Sdim    // register-register variant.
1265360784Sdim    Register SrcReg = I.getOperand(1).getReg();
1266360784Sdim    Register ShiftReg = I.getOperand(2).getReg();
1267353358Sdim    const LLT ShiftTy = MRI.getType(ShiftReg);
1268353358Sdim    const LLT SrcTy = MRI.getType(SrcReg);
1269353358Sdim    if (SrcTy.isVector())
1270353358Sdim      return;
1271353358Sdim    assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1272353358Sdim    if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1273353358Sdim      return;
1274353358Sdim    auto *AmtMI = MRI.getVRegDef(ShiftReg);
1275353358Sdim    assert(AmtMI && "could not find a vreg definition for shift amount");
1276353358Sdim    if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1277353358Sdim      // Insert a subregister copy to implement a 64->32 trunc
1278353358Sdim      MachineIRBuilder MIB(I);
1279353358Sdim      auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1280353358Sdim                       .addReg(ShiftReg, 0, AArch64::sub_32);
1281353358Sdim      MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1282353358Sdim      I.getOperand(2).setReg(Trunc.getReg(0));
1283353358Sdim    }
1284353358Sdim    return;
1285353358Sdim  }
1286360784Sdim  case TargetOpcode::G_STORE:
1287360784Sdim    contractCrossBankCopyIntoStore(I, MRI);
1288360784Sdim    return;
1289353358Sdim  default:
1290353358Sdim    return;
1291353358Sdim  }
1292353358Sdim}
1293353358Sdim
1294353358Sdimbool AArch64InstructionSelector::earlySelectSHL(
1295353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
1296353358Sdim  // We try to match the immediate variant of LSL, which is actually an alias
1297353358Sdim  // for a special case of UBFM. Otherwise, we fall back to the imported
1298353358Sdim  // selector which will match the register variant.
1299353358Sdim  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1300353358Sdim  const auto &MO = I.getOperand(2);
1301353358Sdim  auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1302353358Sdim  if (!VRegAndVal)
1303353358Sdim    return false;
1304353358Sdim
1305353358Sdim  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1306353358Sdim  if (DstTy.isVector())
1307353358Sdim    return false;
1308353358Sdim  bool Is64Bit = DstTy.getSizeInBits() == 64;
1309353358Sdim  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1310353358Sdim  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1311353358Sdim  MachineIRBuilder MIB(I);
1312353358Sdim
1313353358Sdim  if (!Imm1Fn || !Imm2Fn)
1314353358Sdim    return false;
1315353358Sdim
1316353358Sdim  auto NewI =
1317353358Sdim      MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1318353358Sdim                     {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1319353358Sdim
1320353358Sdim  for (auto &RenderFn : *Imm1Fn)
1321353358Sdim    RenderFn(NewI);
1322353358Sdim  for (auto &RenderFn : *Imm2Fn)
1323353358Sdim    RenderFn(NewI);
1324353358Sdim
1325353358Sdim  I.eraseFromParent();
1326353358Sdim  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1327353358Sdim}
1328353358Sdim
1329360784Sdimvoid AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1330360784Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
1331360784Sdim  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1332360784Sdim  // If we're storing a scalar, it doesn't matter what register bank that
1333360784Sdim  // scalar is on. All that matters is the size.
1334360784Sdim  //
1335360784Sdim  // So, if we see something like this (with a 32-bit scalar as an example):
1336360784Sdim  //
1337360784Sdim  // %x:gpr(s32) = ... something ...
1338360784Sdim  // %y:fpr(s32) = COPY %x:gpr(s32)
1339360784Sdim  // G_STORE %y:fpr(s32)
1340360784Sdim  //
1341360784Sdim  // We can fix this up into something like this:
1342360784Sdim  //
1343360784Sdim  // G_STORE %x:gpr(s32)
1344360784Sdim  //
1345360784Sdim  // And then continue the selection process normally.
1346360784Sdim  MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1347360784Sdim  if (!Def)
1348360784Sdim    return;
1349360784Sdim  Register DefDstReg = Def->getOperand(0).getReg();
1350360784Sdim  LLT DefDstTy = MRI.getType(DefDstReg);
1351360784Sdim  Register StoreSrcReg = I.getOperand(0).getReg();
1352360784Sdim  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1353360784Sdim
1354360784Sdim  // If we get something strange like a physical register, then we shouldn't
1355360784Sdim  // go any further.
1356360784Sdim  if (!DefDstTy.isValid())
1357360784Sdim    return;
1358360784Sdim
1359360784Sdim  // Are the source and dst types the same size?
1360360784Sdim  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1361360784Sdim    return;
1362360784Sdim
1363360784Sdim  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1364360784Sdim      RBI.getRegBank(DefDstReg, MRI, TRI))
1365360784Sdim    return;
1366360784Sdim
1367360784Sdim  // We have a cross-bank copy, which is entering a store. Let's fold it.
1368360784Sdim  I.getOperand(0).setReg(DefDstReg);
1369360784Sdim}
1370360784Sdim
1371353358Sdimbool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1372353358Sdim  assert(I.getParent() && "Instruction should be in a basic block!");
1373353358Sdim  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1374353358Sdim
1375353358Sdim  MachineBasicBlock &MBB = *I.getParent();
1376353358Sdim  MachineFunction &MF = *MBB.getParent();
1377353358Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
1378353358Sdim
1379353358Sdim  switch (I.getOpcode()) {
1380353358Sdim  case TargetOpcode::G_SHL:
1381353358Sdim    return earlySelectSHL(I, MRI);
1382360784Sdim  case TargetOpcode::G_CONSTANT: {
1383360784Sdim    bool IsZero = false;
1384360784Sdim    if (I.getOperand(1).isCImm())
1385360784Sdim      IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1386360784Sdim    else if (I.getOperand(1).isImm())
1387360784Sdim      IsZero = I.getOperand(1).getImm() == 0;
1388360784Sdim
1389360784Sdim    if (!IsZero)
1390360784Sdim      return false;
1391360784Sdim
1392360784Sdim    Register DefReg = I.getOperand(0).getReg();
1393360784Sdim    LLT Ty = MRI.getType(DefReg);
1394360784Sdim    if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
1395360784Sdim      return false;
1396360784Sdim
1397360784Sdim    if (Ty == LLT::scalar(64)) {
1398360784Sdim      I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1399360784Sdim      RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1400360784Sdim    } else {
1401360784Sdim      I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1402360784Sdim      RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1403360784Sdim    }
1404360784Sdim    I.setDesc(TII.get(TargetOpcode::COPY));
1405360784Sdim    return true;
1406360784Sdim  }
1407353358Sdim  default:
1408353358Sdim    return false;
1409353358Sdim  }
1410353358Sdim}
1411353358Sdim
1412360784Sdimbool AArch64InstructionSelector::select(MachineInstr &I) {
1413311116Sdim  assert(I.getParent() && "Instruction should be in a basic block!");
1414311116Sdim  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1415311116Sdim
1416311116Sdim  MachineBasicBlock &MBB = *I.getParent();
1417311116Sdim  MachineFunction &MF = *MBB.getParent();
1418311116Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
1419311116Sdim
1420311116Sdim  unsigned Opcode = I.getOpcode();
1421327952Sdim  // G_PHI requires same handling as PHI
1422327952Sdim  if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1423311116Sdim    // Certain non-generic instructions also need some special handling.
1424311116Sdim
1425311116Sdim    if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1426311116Sdim      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1427311116Sdim
1428327952Sdim    if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1429353358Sdim      const Register DefReg = I.getOperand(0).getReg();
1430311116Sdim      const LLT DefTy = MRI.getType(DefReg);
1431311116Sdim
1432353358Sdim      const RegClassOrRegBank &RegClassOrBank =
1433353358Sdim        MRI.getRegClassOrRegBank(DefReg);
1434311116Sdim
1435353358Sdim      const TargetRegisterClass *DefRC
1436353358Sdim        = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1437353358Sdim      if (!DefRC) {
1438353358Sdim        if (!DefTy.isValid()) {
1439353358Sdim          LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1440353358Sdim          return false;
1441353358Sdim        }
1442353358Sdim        const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1443353358Sdim        DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1444311116Sdim        if (!DefRC) {
1445353358Sdim          LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1446353358Sdim          return false;
1447311116Sdim        }
1448311116Sdim      }
1449353358Sdim
1450327952Sdim      I.setDesc(TII.get(TargetOpcode::PHI));
1451311116Sdim
1452311116Sdim      return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1453311116Sdim    }
1454311116Sdim
1455311116Sdim    if (I.isCopy())
1456311116Sdim      return selectCopy(I, TII, MRI, TRI, RBI);
1457311116Sdim
1458311116Sdim    return true;
1459311116Sdim  }
1460311116Sdim
1461311116Sdim
1462311116Sdim  if (I.getNumOperands() != I.getNumExplicitOperands()) {
1463341825Sdim    LLVM_DEBUG(
1464341825Sdim        dbgs() << "Generic instruction has unexpected implicit operands\n");
1465311116Sdim    return false;
1466311116Sdim  }
1467311116Sdim
1468353358Sdim  // Try to do some lowering before we start instruction selecting. These
1469353358Sdim  // lowerings are purely transformations on the input G_MIR and so selection
1470353358Sdim  // must continue after any modification of the instruction.
1471353358Sdim  preISelLower(I);
1472353358Sdim
1473353358Sdim  // There may be patterns where the importer can't deal with them optimally,
1474353358Sdim  // but does select it to a suboptimal sequence so our custom C++ selection
1475353358Sdim  // code later never has a chance to work on it. Therefore, we have an early
1476353358Sdim  // selection attempt here to give priority to certain selection routines
1477353358Sdim  // over the imported ones.
1478353358Sdim  if (earlySelect(I))
1479353358Sdim    return true;
1480353358Sdim
1481360784Sdim  if (selectImpl(I, *CoverageInfo))
1482311116Sdim    return true;
1483311116Sdim
1484311116Sdim  LLT Ty =
1485311116Sdim      I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1486311116Sdim
1487353358Sdim  MachineIRBuilder MIB(I);
1488353358Sdim
1489311116Sdim  switch (Opcode) {
1490311116Sdim  case TargetOpcode::G_BRCOND: {
1491311116Sdim    if (Ty.getSizeInBits() > 32) {
1492311116Sdim      // We shouldn't need this on AArch64, but it would be implemented as an
1493311116Sdim      // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1494311116Sdim      // bit being tested is < 32.
1495341825Sdim      LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1496341825Sdim                        << ", expected at most 32-bits");
1497311116Sdim      return false;
1498311116Sdim    }
1499311116Sdim
1500353358Sdim    const Register CondReg = I.getOperand(0).getReg();
1501311116Sdim    MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1502311116Sdim
1503344779Sdim    // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1504344779Sdim    // instructions will not be produced, as they are conditional branch
1505344779Sdim    // instructions that do not set flags.
1506344779Sdim    bool ProduceNonFlagSettingCondBr =
1507344779Sdim        !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1508344779Sdim    if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1509321369Sdim      return true;
1510321369Sdim
1511344779Sdim    if (ProduceNonFlagSettingCondBr) {
1512344779Sdim      auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1513344779Sdim                     .addUse(CondReg)
1514344779Sdim                     .addImm(/*bit offset=*/0)
1515344779Sdim                     .addMBB(DestMBB);
1516311116Sdim
1517344779Sdim      I.eraseFromParent();
1518344779Sdim      return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1519344779Sdim    } else {
1520344779Sdim      auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1521344779Sdim                     .addDef(AArch64::WZR)
1522344779Sdim                     .addUse(CondReg)
1523344779Sdim                     .addImm(1);
1524344779Sdim      constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1525344779Sdim      auto Bcc =
1526344779Sdim          BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1527344779Sdim              .addImm(AArch64CC::EQ)
1528344779Sdim              .addMBB(DestMBB);
1529344779Sdim
1530344779Sdim      I.eraseFromParent();
1531344779Sdim      return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1532344779Sdim    }
1533311116Sdim  }
1534311116Sdim
1535321369Sdim  case TargetOpcode::G_BRINDIRECT: {
1536321369Sdim    I.setDesc(TII.get(AArch64::BR));
1537321369Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1538321369Sdim  }
1539321369Sdim
1540353358Sdim  case TargetOpcode::G_BRJT:
1541353358Sdim    return selectBrJT(I, MRI);
1542353358Sdim
1543353358Sdim  case TargetOpcode::G_BSWAP: {
1544353358Sdim    // Handle vector types for G_BSWAP directly.
1545353358Sdim    Register DstReg = I.getOperand(0).getReg();
1546353358Sdim    LLT DstTy = MRI.getType(DstReg);
1547353358Sdim
1548353358Sdim    // We should only get vector types here; everything else is handled by the
1549353358Sdim    // importer right now.
1550353358Sdim    if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1551353358Sdim      LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1552353358Sdim      return false;
1553353358Sdim    }
1554353358Sdim
1555353358Sdim    // Only handle 4 and 2 element vectors for now.
1556353358Sdim    // TODO: 16-bit elements.
1557353358Sdim    unsigned NumElts = DstTy.getNumElements();
1558353358Sdim    if (NumElts != 4 && NumElts != 2) {
1559353358Sdim      LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1560353358Sdim      return false;
1561353358Sdim    }
1562353358Sdim
1563353358Sdim    // Choose the correct opcode for the supported types. Right now, that's
1564353358Sdim    // v2s32, v4s32, and v2s64.
1565353358Sdim    unsigned Opc = 0;
1566353358Sdim    unsigned EltSize = DstTy.getElementType().getSizeInBits();
1567353358Sdim    if (EltSize == 32)
1568353358Sdim      Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1569353358Sdim                                          : AArch64::REV32v16i8;
1570353358Sdim    else if (EltSize == 64)
1571353358Sdim      Opc = AArch64::REV64v16i8;
1572353358Sdim
1573353358Sdim    // We should always get something by the time we get here...
1574353358Sdim    assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1575353358Sdim
1576353358Sdim    I.setDesc(TII.get(Opc));
1577353358Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1578353358Sdim  }
1579353358Sdim
1580311116Sdim  case TargetOpcode::G_FCONSTANT:
1581311116Sdim  case TargetOpcode::G_CONSTANT: {
1582311116Sdim    const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1583311116Sdim
1584353358Sdim    const LLT s8 = LLT::scalar(8);
1585353358Sdim    const LLT s16 = LLT::scalar(16);
1586311116Sdim    const LLT s32 = LLT::scalar(32);
1587311116Sdim    const LLT s64 = LLT::scalar(64);
1588311116Sdim    const LLT p0 = LLT::pointer(0, 64);
1589311116Sdim
1590353358Sdim    const Register DefReg = I.getOperand(0).getReg();
1591311116Sdim    const LLT DefTy = MRI.getType(DefReg);
1592311116Sdim    const unsigned DefSize = DefTy.getSizeInBits();
1593311116Sdim    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1594311116Sdim
1595311116Sdim    // FIXME: Redundant check, but even less readable when factored out.
1596311116Sdim    if (isFP) {
1597311116Sdim      if (Ty != s32 && Ty != s64) {
1598341825Sdim        LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1599341825Sdim                          << " constant, expected: " << s32 << " or " << s64
1600341825Sdim                          << '\n');
1601311116Sdim        return false;
1602311116Sdim      }
1603311116Sdim
1604311116Sdim      if (RB.getID() != AArch64::FPRRegBankID) {
1605341825Sdim        LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1606341825Sdim                          << " constant on bank: " << RB
1607341825Sdim                          << ", expected: FPR\n");
1608311116Sdim        return false;
1609311116Sdim      }
1610327952Sdim
1611327952Sdim      // The case when we have 0.0 is covered by tablegen. Reject it here so we
1612327952Sdim      // can be sure tablegen works correctly and isn't rescued by this code.
1613327952Sdim      if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1614327952Sdim        return false;
1615311116Sdim    } else {
1616327952Sdim      // s32 and s64 are covered by tablegen.
1617353358Sdim      if (Ty != p0 && Ty != s8 && Ty != s16) {
1618341825Sdim        LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1619341825Sdim                          << " constant, expected: " << s32 << ", " << s64
1620341825Sdim                          << ", or " << p0 << '\n');
1621311116Sdim        return false;
1622311116Sdim      }
1623311116Sdim
1624311116Sdim      if (RB.getID() != AArch64::GPRRegBankID) {
1625341825Sdim        LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1626341825Sdim                          << " constant on bank: " << RB
1627341825Sdim                          << ", expected: GPR\n");
1628311116Sdim        return false;
1629311116Sdim      }
1630311116Sdim    }
1631311116Sdim
1632353358Sdim    // We allow G_CONSTANT of types < 32b.
1633311116Sdim    const unsigned MovOpc =
1634353358Sdim        DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1635311116Sdim
1636311116Sdim    if (isFP) {
1637353358Sdim      // Either emit a FMOV, or emit a copy to emit a normal mov.
1638311116Sdim      const TargetRegisterClass &GPRRC =
1639311116Sdim          DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1640311116Sdim      const TargetRegisterClass &FPRRC =
1641311116Sdim          DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1642311116Sdim
1643353358Sdim      // Can we use a FMOV instruction to represent the immediate?
1644353358Sdim      if (emitFMovForFConstant(I, MRI))
1645353358Sdim        return true;
1646353358Sdim
1647353358Sdim      // Nope. Emit a copy and use a normal mov instead.
1648353358Sdim      const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1649311116Sdim      MachineOperand &RegOp = I.getOperand(0);
1650311116Sdim      RegOp.setReg(DefGPRReg);
1651353358Sdim      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1652353358Sdim      MIB.buildCopy({DefReg}, {DefGPRReg});
1653311116Sdim
1654311116Sdim      if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1655341825Sdim        LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1656311116Sdim        return false;
1657311116Sdim      }
1658311116Sdim
1659311116Sdim      MachineOperand &ImmOp = I.getOperand(1);
1660311116Sdim      // FIXME: Is going through int64_t always correct?
1661311116Sdim      ImmOp.ChangeToImmediate(
1662311116Sdim          ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
1663321369Sdim    } else if (I.getOperand(1).isCImm()) {
1664311116Sdim      uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1665311116Sdim      I.getOperand(1).ChangeToImmediate(Val);
1666321369Sdim    } else if (I.getOperand(1).isImm()) {
1667321369Sdim      uint64_t Val = I.getOperand(1).getImm();
1668321369Sdim      I.getOperand(1).ChangeToImmediate(Val);
1669311116Sdim    }
1670311116Sdim
1671353358Sdim    I.setDesc(TII.get(MovOpc));
1672311116Sdim    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1673311116Sdim    return true;
1674311116Sdim  }
1675327952Sdim  case TargetOpcode::G_EXTRACT: {
1676360784Sdim    Register DstReg = I.getOperand(0).getReg();
1677360784Sdim    Register SrcReg = I.getOperand(1).getReg();
1678360784Sdim    LLT SrcTy = MRI.getType(SrcReg);
1679360784Sdim    LLT DstTy = MRI.getType(DstReg);
1680330384Sdim    (void)DstTy;
1681329983Sdim    unsigned SrcSize = SrcTy.getSizeInBits();
1682311116Sdim
1683360784Sdim    if (SrcTy.getSizeInBits() > 64) {
1684360784Sdim      // This should be an extract of an s128, which is like a vector extract.
1685360784Sdim      if (SrcTy.getSizeInBits() != 128)
1686360784Sdim        return false;
1687360784Sdim      // Only support extracting 64 bits from an s128 at the moment.
1688360784Sdim      if (DstTy.getSizeInBits() != 64)
1689360784Sdim        return false;
1690360784Sdim
1691360784Sdim      const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1692360784Sdim      const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1693360784Sdim      // Check we have the right regbank always.
1694360784Sdim      assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1695360784Sdim             DstRB.getID() == AArch64::FPRRegBankID &&
1696360784Sdim             "Wrong extract regbank!");
1697360784Sdim      (void)SrcRB;
1698360784Sdim
1699360784Sdim      // Emit the same code as a vector extract.
1700360784Sdim      // Offset must be a multiple of 64.
1701360784Sdim      unsigned Offset = I.getOperand(2).getImm();
1702360784Sdim      if (Offset % 64 != 0)
1703360784Sdim        return false;
1704360784Sdim      unsigned LaneIdx = Offset / 64;
1705360784Sdim      MachineIRBuilder MIB(I);
1706360784Sdim      MachineInstr *Extract = emitExtractVectorElt(
1707360784Sdim          DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1708360784Sdim      if (!Extract)
1709360784Sdim        return false;
1710360784Sdim      I.eraseFromParent();
1711360784Sdim      return true;
1712360784Sdim    }
1713360784Sdim
1714329983Sdim    I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1715327952Sdim    MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1716327952Sdim                                      Ty.getSizeInBits() - 1);
1717327952Sdim
1718329983Sdim    if (SrcSize < 64) {
1719329983Sdim      assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1720329983Sdim             "unexpected G_EXTRACT types");
1721329983Sdim      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1722329983Sdim    }
1723329983Sdim
1724360784Sdim    DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1725353358Sdim    MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1726353358Sdim    MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1727353358Sdim        .addReg(DstReg, 0, AArch64::sub_32);
1728327952Sdim    RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1729327952Sdim                                 AArch64::GPR32RegClass, MRI);
1730327952Sdim    I.getOperand(0).setReg(DstReg);
1731327952Sdim
1732327952Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1733327952Sdim  }
1734327952Sdim
1735327952Sdim  case TargetOpcode::G_INSERT: {
1736327952Sdim    LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1737329983Sdim    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1738329983Sdim    unsigned DstSize = DstTy.getSizeInBits();
1739327952Sdim    // Larger inserts are vectors, same-size ones should be something else by
1740327952Sdim    // now (split up or turned into COPYs).
1741327952Sdim    if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1742327952Sdim      return false;
1743327952Sdim
1744329983Sdim    I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1745327952Sdim    unsigned LSB = I.getOperand(3).getImm();
1746327952Sdim    unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1747329983Sdim    I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1748327952Sdim    MachineInstrBuilder(MF, I).addImm(Width - 1);
1749327952Sdim
1750329983Sdim    if (DstSize < 64) {
1751329983Sdim      assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1752329983Sdim             "unexpected G_INSERT types");
1753329983Sdim      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1754329983Sdim    }
1755329983Sdim
1756353358Sdim    Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1757327952Sdim    BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1758327952Sdim            TII.get(AArch64::SUBREG_TO_REG))
1759327952Sdim        .addDef(SrcReg)
1760327952Sdim        .addImm(0)
1761327952Sdim        .addUse(I.getOperand(2).getReg())
1762327952Sdim        .addImm(AArch64::sub_32);
1763327952Sdim    RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1764327952Sdim                                 AArch64::GPR32RegClass, MRI);
1765327952Sdim    I.getOperand(2).setReg(SrcReg);
1766327952Sdim
1767327952Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1768327952Sdim  }
1769311116Sdim  case TargetOpcode::G_FRAME_INDEX: {
1770311116Sdim    // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1771311116Sdim    if (Ty != LLT::pointer(0, 64)) {
1772341825Sdim      LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1773341825Sdim                        << ", expected: " << LLT::pointer(0, 64) << '\n');
1774311116Sdim      return false;
1775311116Sdim    }
1776311116Sdim    I.setDesc(TII.get(AArch64::ADDXri));
1777311116Sdim
1778311116Sdim    // MOs for a #0 shifted immediate.
1779311116Sdim    I.addOperand(MachineOperand::CreateImm(0));
1780311116Sdim    I.addOperand(MachineOperand::CreateImm(0));
1781311116Sdim
1782311116Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1783311116Sdim  }
1784311116Sdim
1785311116Sdim  case TargetOpcode::G_GLOBAL_VALUE: {
1786311116Sdim    auto GV = I.getOperand(1).getGlobal();
1787360784Sdim    if (GV->isThreadLocal())
1788360784Sdim      return selectTLSGlobalValue(I, MRI);
1789360784Sdim
1790360784Sdim    unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
1791311116Sdim    if (OpFlags & AArch64II::MO_GOT) {
1792311116Sdim      I.setDesc(TII.get(AArch64::LOADgot));
1793311116Sdim      I.getOperand(1).setTargetFlags(OpFlags);
1794328381Sdim    } else if (TM.getCodeModel() == CodeModel::Large) {
1795328381Sdim      // Materialize the global using movz/movk instructions.
1796341825Sdim      materializeLargeCMVal(I, GV, OpFlags);
1797328381Sdim      I.eraseFromParent();
1798328381Sdim      return true;
1799344779Sdim    } else if (TM.getCodeModel() == CodeModel::Tiny) {
1800344779Sdim      I.setDesc(TII.get(AArch64::ADR));
1801344779Sdim      I.getOperand(1).setTargetFlags(OpFlags);
1802311116Sdim    } else {
1803311116Sdim      I.setDesc(TII.get(AArch64::MOVaddr));
1804311116Sdim      I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1805311116Sdim      MachineInstrBuilder MIB(MF, I);
1806311116Sdim      MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1807311116Sdim                           OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1808311116Sdim    }
1809311116Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1810311116Sdim  }
1811311116Sdim
1812353358Sdim  case TargetOpcode::G_ZEXTLOAD:
1813311116Sdim  case TargetOpcode::G_LOAD:
1814311116Sdim  case TargetOpcode::G_STORE: {
1815353358Sdim    bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1816353358Sdim    MachineIRBuilder MIB(I);
1817353358Sdim
1818311116Sdim    LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1819311116Sdim
1820311116Sdim    if (PtrTy != LLT::pointer(0, 64)) {
1821341825Sdim      LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1822341825Sdim                        << ", expected: " << LLT::pointer(0, 64) << '\n');
1823311116Sdim      return false;
1824311116Sdim    }
1825311116Sdim
1826321369Sdim    auto &MemOp = **I.memoperands_begin();
1827360784Sdim    if (MemOp.isAtomic()) {
1828360784Sdim      // For now we just support s8 acquire loads to be able to compile stack
1829360784Sdim      // protector code.
1830360784Sdim      if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
1831360784Sdim          MemOp.getSize() == 1) {
1832360784Sdim        I.setDesc(TII.get(AArch64::LDARB));
1833360784Sdim        return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1834360784Sdim      }
1835360784Sdim      LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
1836321369Sdim      return false;
1837321369Sdim    }
1838341825Sdim    unsigned MemSizeInBits = MemOp.getSize() * 8;
1839321369Sdim
1840353358Sdim    const Register PtrReg = I.getOperand(1).getReg();
1841311116Sdim#ifndef NDEBUG
1842321369Sdim    const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1843311116Sdim    // Sanity-check the pointer register.
1844311116Sdim    assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1845311116Sdim           "Load/Store pointer operand isn't a GPR");
1846311116Sdim    assert(MRI.getType(PtrReg).isPointer() &&
1847311116Sdim           "Load/Store pointer operand isn't a pointer");
1848311116Sdim#endif
1849311116Sdim
1850353358Sdim    const Register ValReg = I.getOperand(0).getReg();
1851311116Sdim    const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1852311116Sdim
1853311116Sdim    const unsigned NewOpc =
1854341825Sdim        selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1855311116Sdim    if (NewOpc == I.getOpcode())
1856311116Sdim      return false;
1857311116Sdim
1858311116Sdim    I.setDesc(TII.get(NewOpc));
1859311116Sdim
1860321369Sdim    uint64_t Offset = 0;
1861321369Sdim    auto *PtrMI = MRI.getVRegDef(PtrReg);
1862321369Sdim
1863321369Sdim    // Try to fold a GEP into our unsigned immediate addressing mode.
1864360784Sdim    if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
1865321369Sdim      if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1866321369Sdim        int64_t Imm = *COff;
1867341825Sdim        const unsigned Size = MemSizeInBits / 8;
1868321369Sdim        const unsigned Scale = Log2_32(Size);
1869321369Sdim        if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1870360784Sdim          Register Ptr2Reg = PtrMI->getOperand(1).getReg();
1871321369Sdim          I.getOperand(1).setReg(Ptr2Reg);
1872321369Sdim          PtrMI = MRI.getVRegDef(Ptr2Reg);
1873321369Sdim          Offset = Imm / Size;
1874321369Sdim        }
1875321369Sdim      }
1876321369Sdim    }
1877321369Sdim
1878321369Sdim    // If we haven't folded anything into our addressing mode yet, try to fold
1879321369Sdim    // a frame index into the base+offset.
1880321369Sdim    if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1881321369Sdim      I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1882321369Sdim
1883321369Sdim    I.addOperand(MachineOperand::CreateImm(Offset));
1884321369Sdim
1885321369Sdim    // If we're storing a 0, use WZR/XZR.
1886321369Sdim    if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1887321369Sdim      if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1888321369Sdim        if (I.getOpcode() == AArch64::STRWui)
1889321369Sdim          I.getOperand(0).setReg(AArch64::WZR);
1890321369Sdim        else if (I.getOpcode() == AArch64::STRXui)
1891321369Sdim          I.getOperand(0).setReg(AArch64::XZR);
1892321369Sdim      }
1893321369Sdim    }
1894321369Sdim
1895353358Sdim    if (IsZExtLoad) {
1896353358Sdim      // The zextload from a smaller type to i32 should be handled by the importer.
1897353358Sdim      if (MRI.getType(ValReg).getSizeInBits() != 64)
1898353358Sdim        return false;
1899353358Sdim      // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1900353358Sdim      //and zero_extend with SUBREG_TO_REG.
1901353358Sdim      Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1902353358Sdim      Register DstReg = I.getOperand(0).getReg();
1903353358Sdim      I.getOperand(0).setReg(LdReg);
1904353358Sdim
1905353358Sdim      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1906353358Sdim      MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1907353358Sdim          .addImm(0)
1908353358Sdim          .addUse(LdReg)
1909353358Sdim          .addImm(AArch64::sub_32);
1910353358Sdim      constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1911353358Sdim      return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1912353358Sdim                                          MRI);
1913353358Sdim    }
1914311116Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1915311116Sdim  }
1916311116Sdim
1917321369Sdim  case TargetOpcode::G_SMULH:
1918321369Sdim  case TargetOpcode::G_UMULH: {
1919311116Sdim    // Reject the various things we don't support yet.
1920311116Sdim    if (unsupportedBinOp(I, RBI, MRI, TRI))
1921311116Sdim      return false;
1922311116Sdim
1923353358Sdim    const Register DefReg = I.getOperand(0).getReg();
1924311116Sdim    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1925311116Sdim
1926311116Sdim    if (RB.getID() != AArch64::GPRRegBankID) {
1927341825Sdim      LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1928311116Sdim      return false;
1929311116Sdim    }
1930311116Sdim
1931321369Sdim    if (Ty != LLT::scalar(64)) {
1932341825Sdim      LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1933341825Sdim                        << ", expected: " << LLT::scalar(64) << '\n');
1934311116Sdim      return false;
1935311116Sdim    }
1936311116Sdim
1937321369Sdim    unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1938321369Sdim                                                             : AArch64::UMULHrr;
1939311116Sdim    I.setDesc(TII.get(NewOpc));
1940311116Sdim
1941311116Sdim    // Now that we selected an opcode, we need to constrain the register
1942311116Sdim    // operands to use appropriate classes.
1943311116Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1944311116Sdim  }
1945311116Sdim  case TargetOpcode::G_FADD:
1946311116Sdim  case TargetOpcode::G_FSUB:
1947311116Sdim  case TargetOpcode::G_FMUL:
1948311116Sdim  case TargetOpcode::G_FDIV:
1949311116Sdim
1950353358Sdim  case TargetOpcode::G_ASHR:
1951353358Sdim    if (MRI.getType(I.getOperand(0).getReg()).isVector())
1952353358Sdim      return selectVectorASHR(I, MRI);
1953353358Sdim    LLVM_FALLTHROUGH;
1954353358Sdim  case TargetOpcode::G_SHL:
1955353358Sdim    if (Opcode == TargetOpcode::G_SHL &&
1956353358Sdim        MRI.getType(I.getOperand(0).getReg()).isVector())
1957353358Sdim      return selectVectorSHL(I, MRI);
1958353358Sdim    LLVM_FALLTHROUGH;
1959311116Sdim  case TargetOpcode::G_OR:
1960360784Sdim  case TargetOpcode::G_LSHR: {
1961311116Sdim    // Reject the various things we don't support yet.
1962311116Sdim    if (unsupportedBinOp(I, RBI, MRI, TRI))
1963311116Sdim      return false;
1964311116Sdim
1965311116Sdim    const unsigned OpSize = Ty.getSizeInBits();
1966311116Sdim
1967353358Sdim    const Register DefReg = I.getOperand(0).getReg();
1968311116Sdim    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1969311116Sdim
1970311116Sdim    const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1971311116Sdim    if (NewOpc == I.getOpcode())
1972311116Sdim      return false;
1973311116Sdim
1974311116Sdim    I.setDesc(TII.get(NewOpc));
1975311116Sdim    // FIXME: Should the type be always reset in setDesc?
1976311116Sdim
1977311116Sdim    // Now that we selected an opcode, we need to constrain the register
1978311116Sdim    // operands to use appropriate classes.
1979311116Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1980311116Sdim  }
1981311116Sdim
1982360784Sdim  case TargetOpcode::G_PTR_ADD: {
1983360784Sdim    MachineIRBuilder MIRBuilder(I);
1984360784Sdim    emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1985360784Sdim            MIRBuilder);
1986360784Sdim    I.eraseFromParent();
1987360784Sdim    return true;
1988360784Sdim  }
1989353358Sdim  case TargetOpcode::G_UADDO: {
1990353358Sdim    // TODO: Support other types.
1991353358Sdim    unsigned OpSize = Ty.getSizeInBits();
1992353358Sdim    if (OpSize != 32 && OpSize != 64) {
1993353358Sdim      LLVM_DEBUG(
1994353358Sdim          dbgs()
1995353358Sdim          << "G_UADDO currently only supported for 32 and 64 b types.\n");
1996353358Sdim      return false;
1997353358Sdim    }
1998353358Sdim
1999353358Sdim    // TODO: Support vectors.
2000353358Sdim    if (Ty.isVector()) {
2001353358Sdim      LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
2002353358Sdim      return false;
2003353358Sdim    }
2004353358Sdim
2005353358Sdim    // Add and set the set condition flag.
2006353358Sdim    unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
2007353358Sdim    MachineIRBuilder MIRBuilder(I);
2008353358Sdim    auto AddsMI = MIRBuilder.buildInstr(
2009353358Sdim        AddsOpc, {I.getOperand(0).getReg()},
2010353358Sdim        {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
2011353358Sdim    constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
2012353358Sdim
2013353358Sdim    // Now, put the overflow result in the register given by the first operand
2014353358Sdim    // to the G_UADDO. CSINC increments the result when the predicate is false,
2015353358Sdim    // so to get the increment when it's true, we need to use the inverse. In
2016353358Sdim    // this case, we want to increment when carry is set.
2017353358Sdim    auto CsetMI = MIRBuilder
2018353358Sdim                      .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2019353358Sdim                                  {Register(AArch64::WZR), Register(AArch64::WZR)})
2020353358Sdim                      .addImm(getInvertedCondCode(AArch64CC::HS));
2021353358Sdim    constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2022353358Sdim    I.eraseFromParent();
2023353358Sdim    return true;
2024353358Sdim  }
2025353358Sdim
2026321369Sdim  case TargetOpcode::G_PTR_MASK: {
2027321369Sdim    uint64_t Align = I.getOperand(2).getImm();
2028321369Sdim    if (Align >= 64 || Align == 0)
2029321369Sdim      return false;
2030321369Sdim
2031321369Sdim    uint64_t Mask = ~((1ULL << Align) - 1);
2032321369Sdim    I.setDesc(TII.get(AArch64::ANDXri));
2033321369Sdim    I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
2034321369Sdim
2035321369Sdim    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2036321369Sdim  }
2037311116Sdim  case TargetOpcode::G_PTRTOINT:
2038311116Sdim  case TargetOpcode::G_TRUNC: {
2039311116Sdim    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2040311116Sdim    const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2041311116Sdim
2042353358Sdim    const Register DstReg = I.getOperand(0).getReg();
2043353358Sdim    const Register SrcReg = I.getOperand(1).getReg();
2044311116Sdim
2045311116Sdim    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2046311116Sdim    const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2047311116Sdim
2048311116Sdim    if (DstRB.getID() != SrcRB.getID()) {
2049341825Sdim      LLVM_DEBUG(
2050341825Sdim          dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2051311116Sdim      return false;
2052311116Sdim    }
2053311116Sdim
2054311116Sdim    if (DstRB.getID() == AArch64::GPRRegBankID) {
2055311116Sdim      const TargetRegisterClass *DstRC =
2056311116Sdim          getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2057311116Sdim      if (!DstRC)
2058311116Sdim        return false;
2059311116Sdim
2060311116Sdim      const TargetRegisterClass *SrcRC =
2061311116Sdim          getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2062311116Sdim      if (!SrcRC)
2063311116Sdim        return false;
2064311116Sdim
2065311116Sdim      if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2066311116Sdim          !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2067341825Sdim        LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2068311116Sdim        return false;
2069311116Sdim      }
2070311116Sdim
2071311116Sdim      if (DstRC == SrcRC) {
2072311116Sdim        // Nothing to be done
2073321369Sdim      } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2074321369Sdim                 SrcTy == LLT::scalar(64)) {
2075321369Sdim        llvm_unreachable("TableGen can import this case");
2076321369Sdim        return false;
2077311116Sdim      } else if (DstRC == &AArch64::GPR32RegClass &&
2078311116Sdim                 SrcRC == &AArch64::GPR64RegClass) {
2079311116Sdim        I.getOperand(1).setSubReg(AArch64::sub_32);
2080311116Sdim      } else {
2081341825Sdim        LLVM_DEBUG(
2082341825Sdim            dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2083311116Sdim        return false;
2084311116Sdim      }
2085311116Sdim
2086311116Sdim      I.setDesc(TII.get(TargetOpcode::COPY));
2087311116Sdim      return true;
2088311116Sdim    } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2089311116Sdim      if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2090311116Sdim        I.setDesc(TII.get(AArch64::XTNv4i16));
2091311116Sdim        constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2092311116Sdim        return true;
2093311116Sdim      }
2094360784Sdim
2095360784Sdim      if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2096360784Sdim        MachineIRBuilder MIB(I);
2097360784Sdim        MachineInstr *Extract = emitExtractVectorElt(
2098360784Sdim            DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2099360784Sdim        if (!Extract)
2100360784Sdim          return false;
2101360784Sdim        I.eraseFromParent();
2102360784Sdim        return true;
2103360784Sdim      }
2104311116Sdim    }
2105311116Sdim
2106311116Sdim    return false;
2107311116Sdim  }
2108311116Sdim
2109311116Sdim  case TargetOpcode::G_ANYEXT: {
2110353358Sdim    const Register DstReg = I.getOperand(0).getReg();
2111353358Sdim    const Register SrcReg = I.getOperand(1).getReg();
2112311116Sdim
2113311116Sdim    const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2114311116Sdim    if (RBDst.getID() != AArch64::GPRRegBankID) {
2115341825Sdim      LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2116341825Sdim                        << ", expected: GPR\n");
2117311116Sdim      return false;
2118311116Sdim    }
2119311116Sdim
2120311116Sdim    const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2121311116Sdim    if (RBSrc.getID() != AArch64::GPRRegBankID) {
2122341825Sdim      LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2123341825Sdim                        << ", expected: GPR\n");
2124311116Sdim      return false;
2125311116Sdim    }
2126311116Sdim
2127311116Sdim    const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2128311116Sdim
2129311116Sdim    if (DstSize == 0) {
2130341825Sdim      LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2131311116Sdim      return false;
2132311116Sdim    }
2133311116Sdim
2134311116Sdim    if (DstSize != 64 && DstSize > 32) {
2135341825Sdim      LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2136341825Sdim                        << ", expected: 32 or 64\n");
2137311116Sdim      return false;
2138311116Sdim    }
2139311116Sdim    // At this point G_ANYEXT is just like a plain COPY, but we need
2140311116Sdim    // to explicitly form the 64-bit value if any.
2141311116Sdim    if (DstSize > 32) {
2142353358Sdim      Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2143311116Sdim      BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2144311116Sdim          .addDef(ExtSrc)
2145311116Sdim          .addImm(0)
2146311116Sdim          .addUse(SrcReg)
2147311116Sdim          .addImm(AArch64::sub_32);
2148311116Sdim      I.getOperand(1).setReg(ExtSrc);
2149311116Sdim    }
2150311116Sdim    return selectCopy(I, TII, MRI, TRI, RBI);
2151311116Sdim  }
2152311116Sdim
2153311116Sdim  case TargetOpcode::G_ZEXT:
2154311116Sdim  case TargetOpcode::G_SEXT: {
2155311116Sdim    unsigned Opcode = I.getOpcode();
2156360784Sdim    const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2157353358Sdim    const Register DefReg = I.getOperand(0).getReg();
2158353358Sdim    const Register SrcReg = I.getOperand(1).getReg();
2159360784Sdim    const LLT DstTy = MRI.getType(DefReg);
2160360784Sdim    const LLT SrcTy = MRI.getType(SrcReg);
2161360784Sdim    unsigned DstSize = DstTy.getSizeInBits();
2162360784Sdim    unsigned SrcSize = SrcTy.getSizeInBits();
2163311116Sdim
2164360784Sdim    if (DstTy.isVector())
2165360784Sdim      return false; // Should be handled by imported patterns.
2166360784Sdim
2167360784Sdim    assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2168360784Sdim               AArch64::GPRRegBankID &&
2169360784Sdim           "Unexpected ext regbank");
2170360784Sdim
2171360784Sdim    MachineIRBuilder MIB(I);
2172360784Sdim    MachineInstr *ExtI;
2173360784Sdim
2174360784Sdim    // First check if we're extending the result of a load which has a dest type
2175360784Sdim    // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2176360784Sdim    // GPR register on AArch64 and all loads which are smaller automatically
2177360784Sdim    // zero-extend the upper bits. E.g.
2178360784Sdim    // %v(s8) = G_LOAD %p, :: (load 1)
2179360784Sdim    // %v2(s32) = G_ZEXT %v(s8)
2180360784Sdim    if (!IsSigned) {
2181360784Sdim      auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2182360784Sdim      if (LoadMI &&
2183360784Sdim          RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2184360784Sdim        const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2185360784Sdim        unsigned BytesLoaded = MemOp->getSize();
2186360784Sdim        if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2187360784Sdim          return selectCopy(I, TII, MRI, TRI, RBI);
2188360784Sdim      }
2189311116Sdim    }
2190311116Sdim
2191360784Sdim    if (DstSize == 64) {
2192311116Sdim      // FIXME: Can we avoid manually doing this?
2193311116Sdim      if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2194341825Sdim        LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2195341825Sdim                          << " operand\n");
2196311116Sdim        return false;
2197311116Sdim      }
2198311116Sdim
2199360784Sdim      auto SubregToReg =
2200360784Sdim          MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2201360784Sdim              .addImm(0)
2202360784Sdim              .addUse(SrcReg)
2203360784Sdim              .addImm(AArch64::sub_32);
2204311116Sdim
2205360784Sdim      ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2206360784Sdim                             {DefReg}, {SubregToReg})
2207360784Sdim                  .addImm(0)
2208360784Sdim                  .addImm(SrcSize - 1);
2209360784Sdim    } else if (DstSize <= 32) {
2210360784Sdim      ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2211360784Sdim                             {DefReg}, {SrcReg})
2212360784Sdim                  .addImm(0)
2213360784Sdim                  .addImm(SrcSize - 1);
2214311116Sdim    } else {
2215311116Sdim      return false;
2216311116Sdim    }
2217311116Sdim
2218311116Sdim    constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2219311116Sdim    I.eraseFromParent();
2220311116Sdim    return true;
2221311116Sdim  }
2222311116Sdim
2223311116Sdim  case TargetOpcode::G_SITOFP:
2224311116Sdim  case TargetOpcode::G_UITOFP:
2225311116Sdim  case TargetOpcode::G_FPTOSI:
2226311116Sdim  case TargetOpcode::G_FPTOUI: {
2227311116Sdim    const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2228311116Sdim              SrcTy = MRI.getType(I.getOperand(1).getReg());
2229311116Sdim    const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2230311116Sdim    if (NewOpc == Opcode)
2231311116Sdim      return false;
2232311116Sdim
2233311116Sdim    I.setDesc(TII.get(NewOpc));
2234311116Sdim    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2235311116Sdim
2236311116Sdim    return true;
2237311116Sdim  }
2238311116Sdim
2239311116Sdim
2240311116Sdim  case TargetOpcode::G_INTTOPTR:
2241327952Sdim    // The importer is currently unable to import pointer types since they
2242327952Sdim    // didn't exist in SelectionDAG.
2243311116Sdim    return selectCopy(I, TII, MRI, TRI, RBI);
2244311116Sdim
2245327952Sdim  case TargetOpcode::G_BITCAST:
2246327952Sdim    // Imported SelectionDAG rules can handle every bitcast except those that
2247327952Sdim    // bitcast from a type to the same type. Ideally, these shouldn't occur
2248353358Sdim    // but we might not run an optimizer that deletes them. The other exception
2249353358Sdim    // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2250353358Sdim    // of them.
2251353358Sdim    return selectCopy(I, TII, MRI, TRI, RBI);
2252311116Sdim
2253311116Sdim  case TargetOpcode::G_SELECT: {
2254311116Sdim    if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2255341825Sdim      LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2256341825Sdim                        << ", expected: " << LLT::scalar(1) << '\n');
2257311116Sdim      return false;
2258311116Sdim    }
2259311116Sdim
2260353358Sdim    const Register CondReg = I.getOperand(1).getReg();
2261353358Sdim    const Register TReg = I.getOperand(2).getReg();
2262353358Sdim    const Register FReg = I.getOperand(3).getReg();
2263311116Sdim
2264353358Sdim    if (tryOptSelect(I))
2265353358Sdim      return true;
2266311116Sdim
2267353358Sdim    Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2268311116Sdim    MachineInstr &TstMI =
2269311116Sdim        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2270311116Sdim             .addDef(AArch64::WZR)
2271311116Sdim             .addUse(CondReg)
2272311116Sdim             .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2273311116Sdim
2274311116Sdim    MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2275311116Sdim                                .addDef(I.getOperand(0).getReg())
2276311116Sdim                                .addUse(TReg)
2277311116Sdim                                .addUse(FReg)
2278311116Sdim                                .addImm(AArch64CC::NE);
2279311116Sdim
2280311116Sdim    constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2281311116Sdim    constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2282311116Sdim
2283311116Sdim    I.eraseFromParent();
2284311116Sdim    return true;
2285311116Sdim  }
2286311116Sdim  case TargetOpcode::G_ICMP: {
2287353358Sdim    if (Ty.isVector())
2288353358Sdim      return selectVectorICmp(I, MRI);
2289353358Sdim
2290327952Sdim    if (Ty != LLT::scalar(32)) {
2291341825Sdim      LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2292341825Sdim                        << ", expected: " << LLT::scalar(32) << '\n');
2293311116Sdim      return false;
2294311116Sdim    }
2295311116Sdim
2296353358Sdim    MachineIRBuilder MIRBuilder(I);
2297353358Sdim    if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2298353358Sdim                            MIRBuilder))
2299311116Sdim      return false;
2300353358Sdim    emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2301353358Sdim                    MIRBuilder);
2302311116Sdim    I.eraseFromParent();
2303311116Sdim    return true;
2304311116Sdim  }
2305311116Sdim
2306311116Sdim  case TargetOpcode::G_FCMP: {
2307327952Sdim    if (Ty != LLT::scalar(32)) {
2308341825Sdim      LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2309341825Sdim                        << ", expected: " << LLT::scalar(32) << '\n');
2310311116Sdim      return false;
2311311116Sdim    }
2312311116Sdim
2313353358Sdim    unsigned CmpOpc = selectFCMPOpc(I, MRI);
2314353358Sdim    if (!CmpOpc)
2315311116Sdim      return false;
2316311116Sdim
2317311116Sdim    // FIXME: regbank
2318311116Sdim
2319311116Sdim    AArch64CC::CondCode CC1, CC2;
2320311116Sdim    changeFCMPPredToAArch64CC(
2321311116Sdim        (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2322311116Sdim
2323353358Sdim    // Partially build the compare. Decide if we need to add a use for the
2324353358Sdim    // third operand based off whether or not we're comparing against 0.0.
2325353358Sdim    auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2326353358Sdim                     .addUse(I.getOperand(2).getReg());
2327311116Sdim
2328353358Sdim    // If we don't have an immediate compare, then we need to add a use of the
2329353358Sdim    // register which wasn't used for the immediate.
2330353358Sdim    // Note that the immediate will always be the last operand.
2331353358Sdim    if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2332353358Sdim      CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2333353358Sdim
2334353358Sdim    const Register DefReg = I.getOperand(0).getReg();
2335353358Sdim    Register Def1Reg = DefReg;
2336311116Sdim    if (CC2 != AArch64CC::AL)
2337311116Sdim      Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2338311116Sdim
2339311116Sdim    MachineInstr &CSetMI =
2340311116Sdim        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2341311116Sdim             .addDef(Def1Reg)
2342311116Sdim             .addUse(AArch64::WZR)
2343311116Sdim             .addUse(AArch64::WZR)
2344321369Sdim             .addImm(getInvertedCondCode(CC1));
2345311116Sdim
2346311116Sdim    if (CC2 != AArch64CC::AL) {
2347353358Sdim      Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2348311116Sdim      MachineInstr &CSet2MI =
2349311116Sdim          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2350311116Sdim               .addDef(Def2Reg)
2351311116Sdim               .addUse(AArch64::WZR)
2352311116Sdim               .addUse(AArch64::WZR)
2353321369Sdim               .addImm(getInvertedCondCode(CC2));
2354311116Sdim      MachineInstr &OrMI =
2355311116Sdim          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2356311116Sdim               .addDef(DefReg)
2357311116Sdim               .addUse(Def1Reg)
2358311116Sdim               .addUse(Def2Reg);
2359311116Sdim      constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2360311116Sdim      constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2361311116Sdim    }
2362353358Sdim    constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2363311116Sdim    constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2364311116Sdim
2365311116Sdim    I.eraseFromParent();
2366311116Sdim    return true;
2367311116Sdim  }
2368321369Sdim  case TargetOpcode::G_VASTART:
2369321369Sdim    return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2370321369Sdim                                : selectVaStartAAPCS(I, MF, MRI);
2371353358Sdim  case TargetOpcode::G_INTRINSIC:
2372353358Sdim    return selectIntrinsic(I, MRI);
2373341825Sdim  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2374353358Sdim    return selectIntrinsicWithSideEffects(I, MRI);
2375341825Sdim  case TargetOpcode::G_IMPLICIT_DEF: {
2376321369Sdim    I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2377341825Sdim    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2378353358Sdim    const Register DstReg = I.getOperand(0).getReg();
2379341825Sdim    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2380341825Sdim    const TargetRegisterClass *DstRC =
2381341825Sdim        getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2382341825Sdim    RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2383321369Sdim    return true;
2384311116Sdim  }
2385341825Sdim  case TargetOpcode::G_BLOCK_ADDR: {
2386341825Sdim    if (TM.getCodeModel() == CodeModel::Large) {
2387341825Sdim      materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2388341825Sdim      I.eraseFromParent();
2389341825Sdim      return true;
2390341825Sdim    } else {
2391341825Sdim      I.setDesc(TII.get(AArch64::MOVaddrBA));
2392341825Sdim      auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2393341825Sdim                           I.getOperand(0).getReg())
2394341825Sdim                       .addBlockAddress(I.getOperand(1).getBlockAddress(),
2395341825Sdim                                        /* Offset */ 0, AArch64II::MO_PAGE)
2396341825Sdim                       .addBlockAddress(
2397341825Sdim                           I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2398341825Sdim                           AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2399341825Sdim      I.eraseFromParent();
2400341825Sdim      return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2401341825Sdim    }
2402341825Sdim  }
2403353358Sdim  case TargetOpcode::G_INTRINSIC_TRUNC:
2404353358Sdim    return selectIntrinsicTrunc(I, MRI);
2405353358Sdim  case TargetOpcode::G_INTRINSIC_ROUND:
2406353358Sdim    return selectIntrinsicRound(I, MRI);
2407344779Sdim  case TargetOpcode::G_BUILD_VECTOR:
2408344779Sdim    return selectBuildVector(I, MRI);
2409344779Sdim  case TargetOpcode::G_MERGE_VALUES:
2410344779Sdim    return selectMergeValues(I, MRI);
2411353358Sdim  case TargetOpcode::G_UNMERGE_VALUES:
2412353358Sdim    return selectUnmergeValues(I, MRI);
2413353358Sdim  case TargetOpcode::G_SHUFFLE_VECTOR:
2414353358Sdim    return selectShuffleVector(I, MRI);
2415353358Sdim  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2416353358Sdim    return selectExtractElt(I, MRI);
2417353358Sdim  case TargetOpcode::G_INSERT_VECTOR_ELT:
2418353358Sdim    return selectInsertElt(I, MRI);
2419353358Sdim  case TargetOpcode::G_CONCAT_VECTORS:
2420353358Sdim    return selectConcatVectors(I, MRI);
2421353358Sdim  case TargetOpcode::G_JUMP_TABLE:
2422353358Sdim    return selectJumpTable(I, MRI);
2423341825Sdim  }
2424311116Sdim
2425311116Sdim  return false;
2426311116Sdim}
2427321369Sdim
2428353358Sdimbool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2429353358Sdim                                            MachineRegisterInfo &MRI) const {
2430353358Sdim  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2431353358Sdim  Register JTAddr = I.getOperand(0).getReg();
2432353358Sdim  unsigned JTI = I.getOperand(1).getIndex();
2433353358Sdim  Register Index = I.getOperand(2).getReg();
2434353358Sdim  MachineIRBuilder MIB(I);
2435344779Sdim
2436353358Sdim  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2437353358Sdim  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2438353358Sdim  MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2439353358Sdim                 {JTAddr, Index})
2440353358Sdim      .addJumpTableIndex(JTI);
2441344779Sdim
2442353358Sdim  // Build the indirect branch.
2443353358Sdim  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2444353358Sdim  I.eraseFromParent();
2445353358Sdim  return true;
2446353358Sdim}
2447353358Sdim
2448353358Sdimbool AArch64InstructionSelector::selectJumpTable(
2449353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
2450353358Sdim  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2451353358Sdim  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2452353358Sdim
2453353358Sdim  Register DstReg = I.getOperand(0).getReg();
2454353358Sdim  unsigned JTI = I.getOperand(1).getIndex();
2455353358Sdim  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2456353358Sdim  MachineIRBuilder MIB(I);
2457353358Sdim  auto MovMI =
2458353358Sdim    MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2459353358Sdim          .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2460353358Sdim          .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2461353358Sdim  I.eraseFromParent();
2462353358Sdim  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2463353358Sdim}
2464353358Sdim
2465360784Sdimbool AArch64InstructionSelector::selectTLSGlobalValue(
2466360784Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
2467360784Sdim  if (!STI.isTargetMachO())
2468360784Sdim    return false;
2469360784Sdim  MachineFunction &MF = *I.getParent()->getParent();
2470360784Sdim  MF.getFrameInfo().setAdjustsStack(true);
2471360784Sdim
2472360784Sdim  const GlobalValue &GV = *I.getOperand(1).getGlobal();
2473360784Sdim  MachineIRBuilder MIB(I);
2474360784Sdim
2475360784Sdim  MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2476360784Sdim      .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2477360784Sdim
2478360784Sdim  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2479360784Sdim                             {Register(AArch64::X0)})
2480360784Sdim                  .addImm(0);
2481360784Sdim
2482360784Sdim  // TLS calls preserve all registers except those that absolutely must be
2483360784Sdim  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2484360784Sdim  // silly).
2485360784Sdim  MIB.buildInstr(AArch64::BLR, {}, {Load})
2486360784Sdim      .addDef(AArch64::X0, RegState::Implicit)
2487360784Sdim      .addRegMask(TRI.getTLSCallPreservedMask());
2488360784Sdim
2489360784Sdim  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2490360784Sdim  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2491360784Sdim                               MRI);
2492360784Sdim  I.eraseFromParent();
2493360784Sdim  return true;
2494360784Sdim}
2495360784Sdim
2496353358Sdimbool AArch64InstructionSelector::selectIntrinsicTrunc(
2497353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
2498353358Sdim  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2499353358Sdim
2500353358Sdim  // Select the correct opcode.
2501353358Sdim  unsigned Opc = 0;
2502353358Sdim  if (!SrcTy.isVector()) {
2503353358Sdim    switch (SrcTy.getSizeInBits()) {
2504353358Sdim    default:
2505353358Sdim    case 16:
2506353358Sdim      Opc = AArch64::FRINTZHr;
2507353358Sdim      break;
2508353358Sdim    case 32:
2509353358Sdim      Opc = AArch64::FRINTZSr;
2510353358Sdim      break;
2511353358Sdim    case 64:
2512353358Sdim      Opc = AArch64::FRINTZDr;
2513353358Sdim      break;
2514353358Sdim    }
2515353358Sdim  } else {
2516353358Sdim    unsigned NumElts = SrcTy.getNumElements();
2517353358Sdim    switch (SrcTy.getElementType().getSizeInBits()) {
2518353358Sdim    default:
2519353358Sdim      break;
2520353358Sdim    case 16:
2521353358Sdim      if (NumElts == 4)
2522353358Sdim        Opc = AArch64::FRINTZv4f16;
2523353358Sdim      else if (NumElts == 8)
2524353358Sdim        Opc = AArch64::FRINTZv8f16;
2525353358Sdim      break;
2526353358Sdim    case 32:
2527353358Sdim      if (NumElts == 2)
2528353358Sdim        Opc = AArch64::FRINTZv2f32;
2529353358Sdim      else if (NumElts == 4)
2530353358Sdim        Opc = AArch64::FRINTZv4f32;
2531353358Sdim      break;
2532353358Sdim    case 64:
2533353358Sdim      if (NumElts == 2)
2534353358Sdim        Opc = AArch64::FRINTZv2f64;
2535353358Sdim      break;
2536353358Sdim    }
2537353358Sdim  }
2538353358Sdim
2539353358Sdim  if (!Opc) {
2540353358Sdim    // Didn't get an opcode above, bail.
2541353358Sdim    LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2542353358Sdim    return false;
2543353358Sdim  }
2544353358Sdim
2545353358Sdim  // Legalization would have set us up perfectly for this; we just need to
2546353358Sdim  // set the opcode and move on.
2547353358Sdim  I.setDesc(TII.get(Opc));
2548353358Sdim  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2549353358Sdim}
2550353358Sdim
2551353358Sdimbool AArch64InstructionSelector::selectIntrinsicRound(
2552353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
2553353358Sdim  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2554353358Sdim
2555353358Sdim  // Select the correct opcode.
2556353358Sdim  unsigned Opc = 0;
2557353358Sdim  if (!SrcTy.isVector()) {
2558353358Sdim    switch (SrcTy.getSizeInBits()) {
2559353358Sdim    default:
2560353358Sdim    case 16:
2561353358Sdim      Opc = AArch64::FRINTAHr;
2562353358Sdim      break;
2563353358Sdim    case 32:
2564353358Sdim      Opc = AArch64::FRINTASr;
2565353358Sdim      break;
2566353358Sdim    case 64:
2567353358Sdim      Opc = AArch64::FRINTADr;
2568353358Sdim      break;
2569353358Sdim    }
2570353358Sdim  } else {
2571353358Sdim    unsigned NumElts = SrcTy.getNumElements();
2572353358Sdim    switch (SrcTy.getElementType().getSizeInBits()) {
2573353358Sdim    default:
2574353358Sdim      break;
2575353358Sdim    case 16:
2576353358Sdim      if (NumElts == 4)
2577353358Sdim        Opc = AArch64::FRINTAv4f16;
2578353358Sdim      else if (NumElts == 8)
2579353358Sdim        Opc = AArch64::FRINTAv8f16;
2580353358Sdim      break;
2581353358Sdim    case 32:
2582353358Sdim      if (NumElts == 2)
2583353358Sdim        Opc = AArch64::FRINTAv2f32;
2584353358Sdim      else if (NumElts == 4)
2585353358Sdim        Opc = AArch64::FRINTAv4f32;
2586353358Sdim      break;
2587353358Sdim    case 64:
2588353358Sdim      if (NumElts == 2)
2589353358Sdim        Opc = AArch64::FRINTAv2f64;
2590353358Sdim      break;
2591353358Sdim    }
2592353358Sdim  }
2593353358Sdim
2594353358Sdim  if (!Opc) {
2595353358Sdim    // Didn't get an opcode above, bail.
2596353358Sdim    LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2597353358Sdim    return false;
2598353358Sdim  }
2599353358Sdim
2600353358Sdim  // Legalization would have set us up perfectly for this; we just need to
2601353358Sdim  // set the opcode and move on.
2602353358Sdim  I.setDesc(TII.get(Opc));
2603353358Sdim  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2604353358Sdim}
2605353358Sdim
2606353358Sdimbool AArch64InstructionSelector::selectVectorICmp(
2607353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
2608353358Sdim  Register DstReg = I.getOperand(0).getReg();
2609353358Sdim  LLT DstTy = MRI.getType(DstReg);
2610353358Sdim  Register SrcReg = I.getOperand(2).getReg();
2611353358Sdim  Register Src2Reg = I.getOperand(3).getReg();
2612353358Sdim  LLT SrcTy = MRI.getType(SrcReg);
2613353358Sdim
2614353358Sdim  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2615353358Sdim  unsigned NumElts = DstTy.getNumElements();
2616353358Sdim
2617353358Sdim  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2618353358Sdim  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2619353358Sdim  // Third index is cc opcode:
2620353358Sdim  // 0 == eq
2621353358Sdim  // 1 == ugt
2622353358Sdim  // 2 == uge
2623353358Sdim  // 3 == ult
2624353358Sdim  // 4 == ule
2625353358Sdim  // 5 == sgt
2626353358Sdim  // 6 == sge
2627353358Sdim  // 7 == slt
2628353358Sdim  // 8 == sle
2629353358Sdim  // ne is done by negating 'eq' result.
2630353358Sdim
2631353358Sdim  // This table below assumes that for some comparisons the operands will be
2632353358Sdim  // commuted.
2633353358Sdim  // ult op == commute + ugt op
2634353358Sdim  // ule op == commute + uge op
2635353358Sdim  // slt op == commute + sgt op
2636353358Sdim  // sle op == commute + sge op
2637353358Sdim  unsigned PredIdx = 0;
2638353358Sdim  bool SwapOperands = false;
2639353358Sdim  CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2640353358Sdim  switch (Pred) {
2641353358Sdim  case CmpInst::ICMP_NE:
2642353358Sdim  case CmpInst::ICMP_EQ:
2643353358Sdim    PredIdx = 0;
2644353358Sdim    break;
2645353358Sdim  case CmpInst::ICMP_UGT:
2646353358Sdim    PredIdx = 1;
2647353358Sdim    break;
2648353358Sdim  case CmpInst::ICMP_UGE:
2649353358Sdim    PredIdx = 2;
2650353358Sdim    break;
2651353358Sdim  case CmpInst::ICMP_ULT:
2652353358Sdim    PredIdx = 3;
2653353358Sdim    SwapOperands = true;
2654353358Sdim    break;
2655353358Sdim  case CmpInst::ICMP_ULE:
2656353358Sdim    PredIdx = 4;
2657353358Sdim    SwapOperands = true;
2658353358Sdim    break;
2659353358Sdim  case CmpInst::ICMP_SGT:
2660353358Sdim    PredIdx = 5;
2661353358Sdim    break;
2662353358Sdim  case CmpInst::ICMP_SGE:
2663353358Sdim    PredIdx = 6;
2664353358Sdim    break;
2665353358Sdim  case CmpInst::ICMP_SLT:
2666353358Sdim    PredIdx = 7;
2667353358Sdim    SwapOperands = true;
2668353358Sdim    break;
2669353358Sdim  case CmpInst::ICMP_SLE:
2670353358Sdim    PredIdx = 8;
2671353358Sdim    SwapOperands = true;
2672353358Sdim    break;
2673353358Sdim  default:
2674353358Sdim    llvm_unreachable("Unhandled icmp predicate");
2675353358Sdim    return false;
2676353358Sdim  }
2677353358Sdim
2678353358Sdim  // This table obviously should be tablegen'd when we have our GISel native
2679353358Sdim  // tablegen selector.
2680353358Sdim
2681353358Sdim  static const unsigned OpcTable[4][4][9] = {
2682353358Sdim      {
2683353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2684353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2685353358Sdim           0 /* invalid */},
2686353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2687353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2688353358Sdim           0 /* invalid */},
2689353358Sdim          {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2690353358Sdim           AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2691353358Sdim           AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2692353358Sdim          {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2693353358Sdim           AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2694353358Sdim           AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2695353358Sdim      },
2696353358Sdim      {
2697353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2698353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2699353358Sdim           0 /* invalid */},
2700353358Sdim          {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2701353358Sdim           AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2702353358Sdim           AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2703353358Sdim          {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2704353358Sdim           AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2705353358Sdim           AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2706353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2707353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2708353358Sdim           0 /* invalid */}
2709353358Sdim      },
2710353358Sdim      {
2711353358Sdim          {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2712353358Sdim           AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2713353358Sdim           AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2714353358Sdim          {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2715353358Sdim           AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2716353358Sdim           AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2717353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2718353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2719353358Sdim           0 /* invalid */},
2720353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2721353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2722353358Sdim           0 /* invalid */}
2723353358Sdim      },
2724353358Sdim      {
2725353358Sdim          {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2726353358Sdim           AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2727353358Sdim           AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2728353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2729353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2730353358Sdim           0 /* invalid */},
2731353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2732353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2733353358Sdim           0 /* invalid */},
2734353358Sdim          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2735353358Sdim           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2736353358Sdim           0 /* invalid */}
2737353358Sdim      },
2738353358Sdim  };
2739353358Sdim  unsigned EltIdx = Log2_32(SrcEltSize / 8);
2740353358Sdim  unsigned NumEltsIdx = Log2_32(NumElts / 2);
2741353358Sdim  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2742353358Sdim  if (!Opc) {
2743353358Sdim    LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2744353358Sdim    return false;
2745353358Sdim  }
2746353358Sdim
2747353358Sdim  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2748353358Sdim  const TargetRegisterClass *SrcRC =
2749353358Sdim      getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2750353358Sdim  if (!SrcRC) {
2751353358Sdim    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2752353358Sdim    return false;
2753353358Sdim  }
2754353358Sdim
2755353358Sdim  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2756353358Sdim  if (SrcTy.getSizeInBits() == 128)
2757353358Sdim    NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2758353358Sdim
2759353358Sdim  if (SwapOperands)
2760353358Sdim    std::swap(SrcReg, Src2Reg);
2761353358Sdim
2762353358Sdim  MachineIRBuilder MIB(I);
2763353358Sdim  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2764353358Sdim  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2765353358Sdim
2766353358Sdim  // Invert if we had a 'ne' cc.
2767353358Sdim  if (NotOpc) {
2768353358Sdim    Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2769353358Sdim    constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2770353358Sdim  } else {
2771353358Sdim    MIB.buildCopy(DstReg, Cmp.getReg(0));
2772353358Sdim  }
2773353358Sdim  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2774353358Sdim  I.eraseFromParent();
2775353358Sdim  return true;
2776353358Sdim}
2777353358Sdim
2778353358SdimMachineInstr *AArch64InstructionSelector::emitScalarToVector(
2779353358Sdim    unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2780353358Sdim    MachineIRBuilder &MIRBuilder) const {
2781353358Sdim  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2782353358Sdim
2783344779Sdim  auto BuildFn = [&](unsigned SubregIndex) {
2784353358Sdim    auto Ins =
2785353358Sdim        MIRBuilder
2786353358Sdim            .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2787353358Sdim            .addImm(SubregIndex);
2788353358Sdim    constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2789353358Sdim    constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2790353358Sdim    return &*Ins;
2791344779Sdim  };
2792344779Sdim
2793353358Sdim  switch (EltSize) {
2794353358Sdim  case 16:
2795353358Sdim    return BuildFn(AArch64::hsub);
2796344779Sdim  case 32:
2797344779Sdim    return BuildFn(AArch64::ssub);
2798344779Sdim  case 64:
2799344779Sdim    return BuildFn(AArch64::dsub);
2800344779Sdim  default:
2801353358Sdim    return nullptr;
2802344779Sdim  }
2803344779Sdim}
2804344779Sdim
2805344779Sdimbool AArch64InstructionSelector::selectMergeValues(
2806344779Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
2807344779Sdim  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2808344779Sdim  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2809344779Sdim  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2810344779Sdim  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2811360784Sdim  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2812344779Sdim
2813344779Sdim  if (I.getNumOperands() != 3)
2814344779Sdim    return false;
2815360784Sdim
2816360784Sdim  // Merging 2 s64s into an s128.
2817360784Sdim  if (DstTy == LLT::scalar(128)) {
2818360784Sdim    if (SrcTy.getSizeInBits() != 64)
2819360784Sdim      return false;
2820360784Sdim    MachineIRBuilder MIB(I);
2821360784Sdim    Register DstReg = I.getOperand(0).getReg();
2822360784Sdim    Register Src1Reg = I.getOperand(1).getReg();
2823360784Sdim    Register Src2Reg = I.getOperand(2).getReg();
2824360784Sdim    auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2825360784Sdim    MachineInstr *InsMI =
2826360784Sdim        emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2827360784Sdim    if (!InsMI)
2828360784Sdim      return false;
2829360784Sdim    MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2830360784Sdim                                          Src2Reg, /* LaneIdx */ 1, RB, MIB);
2831360784Sdim    if (!Ins2MI)
2832360784Sdim      return false;
2833360784Sdim    constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2834360784Sdim    constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2835360784Sdim    I.eraseFromParent();
2836360784Sdim    return true;
2837360784Sdim  }
2838360784Sdim
2839344779Sdim  if (RB.getID() != AArch64::GPRRegBankID)
2840344779Sdim    return false;
2841344779Sdim
2842360784Sdim  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2843360784Sdim    return false;
2844360784Sdim
2845344779Sdim  auto *DstRC = &AArch64::GPR64RegClass;
2846353358Sdim  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2847344779Sdim  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2848344779Sdim                                    TII.get(TargetOpcode::SUBREG_TO_REG))
2849344779Sdim                                .addDef(SubToRegDef)
2850344779Sdim                                .addImm(0)
2851344779Sdim                                .addUse(I.getOperand(1).getReg())
2852344779Sdim                                .addImm(AArch64::sub_32);
2853353358Sdim  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2854344779Sdim  // Need to anyext the second scalar before we can use bfm
2855344779Sdim  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2856344779Sdim                                    TII.get(TargetOpcode::SUBREG_TO_REG))
2857344779Sdim                                .addDef(SubToRegDef2)
2858344779Sdim                                .addImm(0)
2859344779Sdim                                .addUse(I.getOperand(2).getReg())
2860344779Sdim                                .addImm(AArch64::sub_32);
2861344779Sdim  MachineInstr &BFM =
2862344779Sdim      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2863344779Sdim           .addDef(I.getOperand(0).getReg())
2864344779Sdim           .addUse(SubToRegDef)
2865344779Sdim           .addUse(SubToRegDef2)
2866344779Sdim           .addImm(32)
2867344779Sdim           .addImm(31);
2868344779Sdim  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2869344779Sdim  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2870344779Sdim  constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2871344779Sdim  I.eraseFromParent();
2872344779Sdim  return true;
2873344779Sdim}
2874344779Sdim
2875353358Sdimstatic bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2876353358Sdim                              const unsigned EltSize) {
2877353358Sdim  // Choose a lane copy opcode and subregister based off of the size of the
2878353358Sdim  // vector's elements.
2879353358Sdim  switch (EltSize) {
2880353358Sdim  case 16:
2881353358Sdim    CopyOpc = AArch64::CPYi16;
2882353358Sdim    ExtractSubReg = AArch64::hsub;
2883353358Sdim    break;
2884353358Sdim  case 32:
2885353358Sdim    CopyOpc = AArch64::CPYi32;
2886353358Sdim    ExtractSubReg = AArch64::ssub;
2887353358Sdim    break;
2888353358Sdim  case 64:
2889353358Sdim    CopyOpc = AArch64::CPYi64;
2890353358Sdim    ExtractSubReg = AArch64::dsub;
2891353358Sdim    break;
2892353358Sdim  default:
2893353358Sdim    // Unknown size, bail out.
2894353358Sdim    LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2895353358Sdim    return false;
2896353358Sdim  }
2897353358Sdim  return true;
2898353358Sdim}
2899353358Sdim
2900353358SdimMachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2901353358Sdim    Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2902353358Sdim    Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2903353358Sdim  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2904353358Sdim  unsigned CopyOpc = 0;
2905353358Sdim  unsigned ExtractSubReg = 0;
2906353358Sdim  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2907353358Sdim    LLVM_DEBUG(
2908353358Sdim        dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2909353358Sdim    return nullptr;
2910353358Sdim  }
2911353358Sdim
2912353358Sdim  const TargetRegisterClass *DstRC =
2913353358Sdim      getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2914353358Sdim  if (!DstRC) {
2915353358Sdim    LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2916353358Sdim    return nullptr;
2917353358Sdim  }
2918353358Sdim
2919353358Sdim  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2920353358Sdim  const LLT &VecTy = MRI.getType(VecReg);
2921353358Sdim  const TargetRegisterClass *VecRC =
2922353358Sdim      getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2923353358Sdim  if (!VecRC) {
2924353358Sdim    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2925353358Sdim    return nullptr;
2926353358Sdim  }
2927353358Sdim
2928353358Sdim  // The register that we're going to copy into.
2929353358Sdim  Register InsertReg = VecReg;
2930353358Sdim  if (!DstReg)
2931353358Sdim    DstReg = MRI.createVirtualRegister(DstRC);
2932353358Sdim  // If the lane index is 0, we just use a subregister COPY.
2933353358Sdim  if (LaneIdx == 0) {
2934353358Sdim    auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2935353358Sdim                    .addReg(VecReg, 0, ExtractSubReg);
2936353358Sdim    RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2937353358Sdim    return &*Copy;
2938353358Sdim  }
2939353358Sdim
2940353358Sdim  // Lane copies require 128-bit wide registers. If we're dealing with an
2941353358Sdim  // unpacked vector, then we need to move up to that width. Insert an implicit
2942353358Sdim  // def and a subregister insert to get us there.
2943353358Sdim  if (VecTy.getSizeInBits() != 128) {
2944353358Sdim    MachineInstr *ScalarToVector = emitScalarToVector(
2945353358Sdim        VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2946353358Sdim    if (!ScalarToVector)
2947353358Sdim      return nullptr;
2948353358Sdim    InsertReg = ScalarToVector->getOperand(0).getReg();
2949353358Sdim  }
2950353358Sdim
2951353358Sdim  MachineInstr *LaneCopyMI =
2952353358Sdim      MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2953353358Sdim  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2954353358Sdim
2955353358Sdim  // Make sure that we actually constrain the initial copy.
2956353358Sdim  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2957353358Sdim  return LaneCopyMI;
2958353358Sdim}
2959353358Sdim
2960353358Sdimbool AArch64InstructionSelector::selectExtractElt(
2961344779Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
2962353358Sdim  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2963353358Sdim         "unexpected opcode!");
2964353358Sdim  Register DstReg = I.getOperand(0).getReg();
2965353358Sdim  const LLT NarrowTy = MRI.getType(DstReg);
2966353358Sdim  const Register SrcReg = I.getOperand(1).getReg();
2967353358Sdim  const LLT WideTy = MRI.getType(SrcReg);
2968353358Sdim  (void)WideTy;
2969353358Sdim  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2970353358Sdim         "source register size too small!");
2971353358Sdim  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2972353358Sdim
2973353358Sdim  // Need the lane index to determine the correct copy opcode.
2974353358Sdim  MachineOperand &LaneIdxOp = I.getOperand(2);
2975353358Sdim  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2976353358Sdim
2977353358Sdim  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2978353358Sdim    LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2979353358Sdim    return false;
2980353358Sdim  }
2981353358Sdim
2982353358Sdim  // Find the index to extract from.
2983353358Sdim  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2984353358Sdim  if (!VRegAndVal)
2985353358Sdim    return false;
2986353358Sdim  unsigned LaneIdx = VRegAndVal->Value;
2987353358Sdim
2988353358Sdim  MachineIRBuilder MIRBuilder(I);
2989353358Sdim
2990353358Sdim  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2991353358Sdim  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2992353358Sdim                                               LaneIdx, MIRBuilder);
2993353358Sdim  if (!Extract)
2994353358Sdim    return false;
2995353358Sdim
2996353358Sdim  I.eraseFromParent();
2997353358Sdim  return true;
2998353358Sdim}
2999353358Sdim
3000353358Sdimbool AArch64InstructionSelector::selectSplitVectorUnmerge(
3001353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
3002353358Sdim  unsigned NumElts = I.getNumOperands() - 1;
3003353358Sdim  Register SrcReg = I.getOperand(NumElts).getReg();
3004353358Sdim  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3005353358Sdim  const LLT SrcTy = MRI.getType(SrcReg);
3006353358Sdim
3007353358Sdim  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3008353358Sdim  if (SrcTy.getSizeInBits() > 128) {
3009353358Sdim    LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3010353358Sdim    return false;
3011353358Sdim  }
3012353358Sdim
3013353358Sdim  MachineIRBuilder MIB(I);
3014353358Sdim
3015353358Sdim  // We implement a split vector operation by treating the sub-vectors as
3016353358Sdim  // scalars and extracting them.
3017353358Sdim  const RegisterBank &DstRB =
3018353358Sdim      *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3019353358Sdim  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3020353358Sdim    Register Dst = I.getOperand(OpIdx).getReg();
3021353358Sdim    MachineInstr *Extract =
3022353358Sdim        emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3023353358Sdim    if (!Extract)
3024353358Sdim      return false;
3025353358Sdim  }
3026353358Sdim  I.eraseFromParent();
3027353358Sdim  return true;
3028353358Sdim}
3029353358Sdim
3030353358Sdimbool AArch64InstructionSelector::selectUnmergeValues(
3031353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
3032353358Sdim  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3033353358Sdim         "unexpected opcode");
3034353358Sdim
3035353358Sdim  // TODO: Handle unmerging into GPRs and from scalars to scalars.
3036353358Sdim  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3037353358Sdim          AArch64::FPRRegBankID ||
3038353358Sdim      RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3039353358Sdim          AArch64::FPRRegBankID) {
3040353358Sdim    LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3041353358Sdim                         "currently unsupported.\n");
3042353358Sdim    return false;
3043353358Sdim  }
3044353358Sdim
3045353358Sdim  // The last operand is the vector source register, and every other operand is
3046353358Sdim  // a register to unpack into.
3047353358Sdim  unsigned NumElts = I.getNumOperands() - 1;
3048353358Sdim  Register SrcReg = I.getOperand(NumElts).getReg();
3049353358Sdim  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3050353358Sdim  const LLT WideTy = MRI.getType(SrcReg);
3051353358Sdim  (void)WideTy;
3052360784Sdim  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3053360784Sdim         "can only unmerge from vector or s128 types!");
3054353358Sdim  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3055353358Sdim         "source register size too small!");
3056353358Sdim
3057353358Sdim  if (!NarrowTy.isScalar())
3058353358Sdim    return selectSplitVectorUnmerge(I, MRI);
3059353358Sdim
3060353358Sdim  MachineIRBuilder MIB(I);
3061353358Sdim
3062353358Sdim  // Choose a lane copy opcode and subregister based off of the size of the
3063353358Sdim  // vector's elements.
3064353358Sdim  unsigned CopyOpc = 0;
3065353358Sdim  unsigned ExtractSubReg = 0;
3066353358Sdim  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3067353358Sdim    return false;
3068353358Sdim
3069353358Sdim  // Set up for the lane copies.
3070353358Sdim  MachineBasicBlock &MBB = *I.getParent();
3071353358Sdim
3072353358Sdim  // Stores the registers we'll be copying from.
3073353358Sdim  SmallVector<Register, 4> InsertRegs;
3074353358Sdim
3075353358Sdim  // We'll use the first register twice, so we only need NumElts-1 registers.
3076353358Sdim  unsigned NumInsertRegs = NumElts - 1;
3077353358Sdim
3078353358Sdim  // If our elements fit into exactly 128 bits, then we can copy from the source
3079353358Sdim  // directly. Otherwise, we need to do a bit of setup with some subregister
3080353358Sdim  // inserts.
3081353358Sdim  if (NarrowTy.getSizeInBits() * NumElts == 128) {
3082353358Sdim    InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3083353358Sdim  } else {
3084353358Sdim    // No. We have to perform subregister inserts. For each insert, create an
3085353358Sdim    // implicit def and a subregister insert, and save the register we create.
3086353358Sdim    for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3087353358Sdim      Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3088353358Sdim      MachineInstr &ImpDefMI =
3089353358Sdim          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3090353358Sdim                   ImpDefReg);
3091353358Sdim
3092353358Sdim      // Now, create the subregister insert from SrcReg.
3093353358Sdim      Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3094353358Sdim      MachineInstr &InsMI =
3095353358Sdim          *BuildMI(MBB, I, I.getDebugLoc(),
3096353358Sdim                   TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3097353358Sdim               .addUse(ImpDefReg)
3098353358Sdim               .addUse(SrcReg)
3099353358Sdim               .addImm(AArch64::dsub);
3100353358Sdim
3101353358Sdim      constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3102353358Sdim      constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3103353358Sdim
3104353358Sdim      // Save the register so that we can copy from it after.
3105353358Sdim      InsertRegs.push_back(InsertReg);
3106353358Sdim    }
3107353358Sdim  }
3108353358Sdim
3109353358Sdim  // Now that we've created any necessary subregister inserts, we can
3110353358Sdim  // create the copies.
3111353358Sdim  //
3112353358Sdim  // Perform the first copy separately as a subregister copy.
3113353358Sdim  Register CopyTo = I.getOperand(0).getReg();
3114353358Sdim  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3115353358Sdim                       .addReg(InsertRegs[0], 0, ExtractSubReg);
3116353358Sdim  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3117353358Sdim
3118353358Sdim  // Now, perform the remaining copies as vector lane copies.
3119353358Sdim  unsigned LaneIdx = 1;
3120353358Sdim  for (Register InsReg : InsertRegs) {
3121353358Sdim    Register CopyTo = I.getOperand(LaneIdx).getReg();
3122353358Sdim    MachineInstr &CopyInst =
3123353358Sdim        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3124353358Sdim             .addUse(InsReg)
3125353358Sdim             .addImm(LaneIdx);
3126353358Sdim    constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3127353358Sdim    ++LaneIdx;
3128353358Sdim  }
3129353358Sdim
3130353358Sdim  // Separately constrain the first copy's destination. Because of the
3131353358Sdim  // limitation in constrainOperandRegClass, we can't guarantee that this will
3132353358Sdim  // actually be constrained. So, do it ourselves using the second operand.
3133353358Sdim  const TargetRegisterClass *RC =
3134353358Sdim      MRI.getRegClassOrNull(I.getOperand(1).getReg());
3135353358Sdim  if (!RC) {
3136353358Sdim    LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3137353358Sdim    return false;
3138353358Sdim  }
3139353358Sdim
3140353358Sdim  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3141353358Sdim  I.eraseFromParent();
3142353358Sdim  return true;
3143353358Sdim}
3144353358Sdim
3145353358Sdimbool AArch64InstructionSelector::selectConcatVectors(
3146353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
3147353358Sdim  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3148353358Sdim         "Unexpected opcode");
3149353358Sdim  Register Dst = I.getOperand(0).getReg();
3150353358Sdim  Register Op1 = I.getOperand(1).getReg();
3151353358Sdim  Register Op2 = I.getOperand(2).getReg();
3152353358Sdim  MachineIRBuilder MIRBuilder(I);
3153353358Sdim  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3154353358Sdim  if (!ConcatMI)
3155353358Sdim    return false;
3156353358Sdim  I.eraseFromParent();
3157353358Sdim  return true;
3158353358Sdim}
3159353358Sdim
3160353358Sdimunsigned
3161353358SdimAArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3162353358Sdim                                                  MachineFunction &MF) const {
3163353358Sdim  Type *CPTy = CPVal->getType();
3164353358Sdim  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3165353358Sdim  if (Align == 0)
3166353358Sdim    Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3167353358Sdim
3168353358Sdim  MachineConstantPool *MCP = MF.getConstantPool();
3169353358Sdim  return MCP->getConstantPoolIndex(CPVal, Align);
3170353358Sdim}
3171353358Sdim
3172353358SdimMachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3173353358Sdim    Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3174353358Sdim  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3175353358Sdim
3176353358Sdim  auto Adrp =
3177353358Sdim      MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3178353358Sdim          .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3179353358Sdim
3180353358Sdim  MachineInstr *LoadMI = nullptr;
3181353358Sdim  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3182353358Sdim  case 16:
3183353358Sdim    LoadMI =
3184353358Sdim        &*MIRBuilder
3185353358Sdim              .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3186353358Sdim              .addConstantPoolIndex(CPIdx, 0,
3187353358Sdim                                    AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3188353358Sdim    break;
3189353358Sdim  case 8:
3190353358Sdim    LoadMI = &*MIRBuilder
3191353358Sdim                 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3192353358Sdim                 .addConstantPoolIndex(
3193353358Sdim                     CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3194353358Sdim    break;
3195353358Sdim  default:
3196353358Sdim    LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3197353358Sdim                      << *CPVal->getType());
3198353358Sdim    return nullptr;
3199353358Sdim  }
3200353358Sdim  constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3201353358Sdim  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3202353358Sdim  return LoadMI;
3203353358Sdim}
3204353358Sdim
3205353358Sdim/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3206353358Sdim/// size and RB.
3207353358Sdimstatic std::pair<unsigned, unsigned>
3208353358SdimgetInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3209353358Sdim  unsigned Opc, SubregIdx;
3210344779Sdim  if (RB.getID() == AArch64::GPRRegBankID) {
3211344779Sdim    if (EltSize == 32) {
3212344779Sdim      Opc = AArch64::INSvi32gpr;
3213344779Sdim      SubregIdx = AArch64::ssub;
3214353358Sdim    } else if (EltSize == 64) {
3215344779Sdim      Opc = AArch64::INSvi64gpr;
3216344779Sdim      SubregIdx = AArch64::dsub;
3217353358Sdim    } else {
3218353358Sdim      llvm_unreachable("invalid elt size!");
3219344779Sdim    }
3220344779Sdim  } else {
3221353358Sdim    if (EltSize == 8) {
3222353358Sdim      Opc = AArch64::INSvi8lane;
3223353358Sdim      SubregIdx = AArch64::bsub;
3224353358Sdim    } else if (EltSize == 16) {
3225353358Sdim      Opc = AArch64::INSvi16lane;
3226353358Sdim      SubregIdx = AArch64::hsub;
3227353358Sdim    } else if (EltSize == 32) {
3228344779Sdim      Opc = AArch64::INSvi32lane;
3229344779Sdim      SubregIdx = AArch64::ssub;
3230353358Sdim    } else if (EltSize == 64) {
3231344779Sdim      Opc = AArch64::INSvi64lane;
3232344779Sdim      SubregIdx = AArch64::dsub;
3233353358Sdim    } else {
3234353358Sdim      llvm_unreachable("invalid elt size!");
3235344779Sdim    }
3236344779Sdim  }
3237353358Sdim  return std::make_pair(Opc, SubregIdx);
3238353358Sdim}
3239344779Sdim
3240353358SdimMachineInstr *
3241360784SdimAArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3242360784Sdim                                    MachineOperand &RHS,
3243360784Sdim                                    MachineIRBuilder &MIRBuilder) const {
3244360784Sdim  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3245360784Sdim  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3246360784Sdim  static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3247360784Sdim                                       {AArch64::ADDWrr, AArch64::ADDWri}};
3248360784Sdim  bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3249360784Sdim  auto ImmFns = selectArithImmed(RHS);
3250360784Sdim  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3251360784Sdim  auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3252360784Sdim
3253360784Sdim  // If we matched a valid constant immediate, add those operands.
3254360784Sdim  if (ImmFns) {
3255360784Sdim    for (auto &RenderFn : *ImmFns)
3256360784Sdim      RenderFn(AddMI);
3257360784Sdim  } else {
3258360784Sdim    AddMI.addUse(RHS.getReg());
3259360784Sdim  }
3260360784Sdim
3261360784Sdim  constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3262360784Sdim  return &*AddMI;
3263360784Sdim}
3264360784Sdim
3265360784SdimMachineInstr *
3266353358SdimAArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3267353358Sdim                                    MachineIRBuilder &MIRBuilder) const {
3268353358Sdim  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3269353358Sdim  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3270353358Sdim  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3271353358Sdim                                       {AArch64::ADDSWrr, AArch64::ADDSWri}};
3272353358Sdim  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3273353358Sdim  auto ImmFns = selectArithImmed(RHS);
3274353358Sdim  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3275353358Sdim  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3276344779Sdim
3277353358Sdim  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3278353358Sdim
3279353358Sdim  // If we matched a valid constant immediate, add those operands.
3280353358Sdim  if (ImmFns) {
3281353358Sdim    for (auto &RenderFn : *ImmFns)
3282353358Sdim      RenderFn(CmpMI);
3283353358Sdim  } else {
3284353358Sdim    CmpMI.addUse(RHS.getReg());
3285353358Sdim  }
3286353358Sdim
3287353358Sdim  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3288353358Sdim  return &*CmpMI;
3289353358Sdim}
3290353358Sdim
3291353358SdimMachineInstr *
3292353358SdimAArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3293353358Sdim                                    MachineIRBuilder &MIRBuilder) const {
3294353358Sdim  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3295353358Sdim  unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3296353358Sdim  bool Is32Bit = (RegSize == 32);
3297353358Sdim  static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3298353358Sdim                                       {AArch64::ANDSWrr, AArch64::ANDSWri}};
3299353358Sdim  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3300353358Sdim
3301353358Sdim  // We might be able to fold in an immediate into the TST. We need to make sure
3302353358Sdim  // it's a logical immediate though, since ANDS requires that.
3303353358Sdim  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3304353358Sdim  bool IsImmForm = ValAndVReg.hasValue() &&
3305353358Sdim                   AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3306353358Sdim  unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3307353358Sdim  auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3308353358Sdim
3309353358Sdim  if (IsImmForm)
3310353358Sdim    TstMI.addImm(
3311353358Sdim        AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3312353358Sdim  else
3313353358Sdim    TstMI.addUse(RHS);
3314353358Sdim
3315353358Sdim  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3316353358Sdim  return &*TstMI;
3317353358Sdim}
3318353358Sdim
3319353358SdimMachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3320353358Sdim    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3321353358Sdim    MachineIRBuilder &MIRBuilder) const {
3322353358Sdim  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3323353358Sdim  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3324353358Sdim
3325353358Sdim  // Fold the compare if possible.
3326353358Sdim  MachineInstr *FoldCmp =
3327353358Sdim      tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3328353358Sdim  if (FoldCmp)
3329353358Sdim    return FoldCmp;
3330353358Sdim
3331353358Sdim  // Can't fold into a CMN. Just emit a normal compare.
3332353358Sdim  unsigned CmpOpc = 0;
3333353358Sdim  Register ZReg;
3334353358Sdim
3335353358Sdim  LLT CmpTy = MRI.getType(LHS.getReg());
3336353358Sdim  assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3337353358Sdim         "Expected scalar or pointer");
3338353358Sdim  if (CmpTy == LLT::scalar(32)) {
3339353358Sdim    CmpOpc = AArch64::SUBSWrr;
3340353358Sdim    ZReg = AArch64::WZR;
3341353358Sdim  } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3342353358Sdim    CmpOpc = AArch64::SUBSXrr;
3343353358Sdim    ZReg = AArch64::XZR;
3344353358Sdim  } else {
3345353358Sdim    return nullptr;
3346353358Sdim  }
3347353358Sdim
3348353358Sdim  // Try to match immediate forms.
3349353358Sdim  auto ImmFns = selectArithImmed(RHS);
3350353358Sdim  if (ImmFns)
3351353358Sdim    CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3352353358Sdim
3353353358Sdim  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3354353358Sdim  // If we matched a valid constant immediate, add those operands.
3355353358Sdim  if (ImmFns) {
3356353358Sdim    for (auto &RenderFn : *ImmFns)
3357353358Sdim      RenderFn(CmpMI);
3358353358Sdim  } else {
3359353358Sdim    CmpMI.addUse(RHS.getReg());
3360353358Sdim  }
3361353358Sdim
3362353358Sdim  // Make sure that we can constrain the compare that we emitted.
3363353358Sdim  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3364353358Sdim  return &*CmpMI;
3365353358Sdim}
3366353358Sdim
3367353358SdimMachineInstr *AArch64InstructionSelector::emitVectorConcat(
3368353358Sdim    Optional<Register> Dst, Register Op1, Register Op2,
3369353358Sdim    MachineIRBuilder &MIRBuilder) const {
3370353358Sdim  // We implement a vector concat by:
3371353358Sdim  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3372353358Sdim  // 2. Insert the upper vector into the destination's upper element
3373353358Sdim  // TODO: some of this code is common with G_BUILD_VECTOR handling.
3374353358Sdim  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3375353358Sdim
3376353358Sdim  const LLT Op1Ty = MRI.getType(Op1);
3377353358Sdim  const LLT Op2Ty = MRI.getType(Op2);
3378353358Sdim
3379353358Sdim  if (Op1Ty != Op2Ty) {
3380353358Sdim    LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3381353358Sdim    return nullptr;
3382353358Sdim  }
3383353358Sdim  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3384353358Sdim
3385353358Sdim  if (Op1Ty.getSizeInBits() >= 128) {
3386353358Sdim    LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3387353358Sdim    return nullptr;
3388353358Sdim  }
3389353358Sdim
3390353358Sdim  // At the moment we just support 64 bit vector concats.
3391353358Sdim  if (Op1Ty.getSizeInBits() != 64) {
3392353358Sdim    LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3393353358Sdim    return nullptr;
3394353358Sdim  }
3395353358Sdim
3396353358Sdim  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3397353358Sdim  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3398353358Sdim  const TargetRegisterClass *DstRC =
3399353358Sdim      getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3400353358Sdim
3401353358Sdim  MachineInstr *WidenedOp1 =
3402353358Sdim      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3403353358Sdim  MachineInstr *WidenedOp2 =
3404353358Sdim      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3405353358Sdim  if (!WidenedOp1 || !WidenedOp2) {
3406353358Sdim    LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3407353358Sdim    return nullptr;
3408353358Sdim  }
3409353358Sdim
3410353358Sdim  // Now do the insert of the upper element.
3411353358Sdim  unsigned InsertOpc, InsSubRegIdx;
3412353358Sdim  std::tie(InsertOpc, InsSubRegIdx) =
3413353358Sdim      getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3414353358Sdim
3415353358Sdim  if (!Dst)
3416353358Sdim    Dst = MRI.createVirtualRegister(DstRC);
3417353358Sdim  auto InsElt =
3418353358Sdim      MIRBuilder
3419353358Sdim          .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3420353358Sdim          .addImm(1) /* Lane index */
3421353358Sdim          .addUse(WidenedOp2->getOperand(0).getReg())
3422353358Sdim          .addImm(0);
3423353358Sdim  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3424353358Sdim  return &*InsElt;
3425353358Sdim}
3426353358Sdim
3427353358SdimMachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3428353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
3429353358Sdim  assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3430353358Sdim         "Expected a G_FCONSTANT!");
3431353358Sdim  MachineOperand &ImmOp = I.getOperand(1);
3432353358Sdim  unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3433353358Sdim
3434353358Sdim  // Only handle 32 and 64 bit defs for now.
3435353358Sdim  if (DefSize != 32 && DefSize != 64)
3436353358Sdim    return nullptr;
3437353358Sdim
3438353358Sdim  // Don't handle null values using FMOV.
3439353358Sdim  if (ImmOp.getFPImm()->isNullValue())
3440353358Sdim    return nullptr;
3441353358Sdim
3442353358Sdim  // Get the immediate representation for the FMOV.
3443353358Sdim  const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3444353358Sdim  int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3445353358Sdim                          : AArch64_AM::getFP64Imm(ImmValAPF);
3446353358Sdim
3447353358Sdim  // If this is -1, it means the immediate can't be represented as the requested
3448353358Sdim  // floating point value. Bail.
3449353358Sdim  if (Imm == -1)
3450353358Sdim    return nullptr;
3451353358Sdim
3452353358Sdim  // Update MI to represent the new FMOV instruction, constrain it, and return.
3453353358Sdim  ImmOp.ChangeToImmediate(Imm);
3454353358Sdim  unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3455353358Sdim  I.setDesc(TII.get(MovOpc));
3456353358Sdim  constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3457353358Sdim  return &I;
3458353358Sdim}
3459353358Sdim
3460353358SdimMachineInstr *
3461353358SdimAArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3462353358Sdim                                     MachineIRBuilder &MIRBuilder) const {
3463353358Sdim  // CSINC increments the result when the predicate is false. Invert it.
3464353358Sdim  const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3465353358Sdim      CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3466353358Sdim  auto I =
3467353358Sdim      MIRBuilder
3468353358Sdim    .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3469353358Sdim          .addImm(InvCC);
3470353358Sdim  constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3471353358Sdim  return &*I;
3472353358Sdim}
3473353358Sdim
3474353358Sdimbool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3475353358Sdim  MachineIRBuilder MIB(I);
3476353358Sdim  MachineRegisterInfo &MRI = *MIB.getMRI();
3477353358Sdim  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3478353358Sdim
3479353358Sdim  // We want to recognize this pattern:
3480353358Sdim  //
3481353358Sdim  // $z = G_FCMP pred, $x, $y
3482353358Sdim  // ...
3483353358Sdim  // $w = G_SELECT $z, $a, $b
3484353358Sdim  //
3485353358Sdim  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3486353358Sdim  // some copies/truncs in between.)
3487353358Sdim  //
3488353358Sdim  // If we see this, then we can emit something like this:
3489353358Sdim  //
3490353358Sdim  // fcmp $x, $y
3491353358Sdim  // fcsel $w, $a, $b, pred
3492353358Sdim  //
3493353358Sdim  // Rather than emitting both of the rather long sequences in the standard
3494353358Sdim  // G_FCMP/G_SELECT select methods.
3495353358Sdim
3496353358Sdim  // First, check if the condition is defined by a compare.
3497353358Sdim  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3498353358Sdim  while (CondDef) {
3499353358Sdim    // We can only fold if all of the defs have one use.
3500353358Sdim    if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3501353358Sdim      return false;
3502353358Sdim
3503353358Sdim    // We can skip over G_TRUNC since the condition is 1-bit.
3504353358Sdim    // Truncating/extending can have no impact on the value.
3505353358Sdim    unsigned Opc = CondDef->getOpcode();
3506353358Sdim    if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3507353358Sdim      break;
3508353358Sdim
3509353358Sdim    // Can't see past copies from physregs.
3510353358Sdim    if (Opc == TargetOpcode::COPY &&
3511360784Sdim        Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3512353358Sdim      return false;
3513353358Sdim
3514353358Sdim    CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3515353358Sdim  }
3516353358Sdim
3517353358Sdim  // Is the condition defined by a compare?
3518353358Sdim  if (!CondDef)
3519353358Sdim    return false;
3520353358Sdim
3521353358Sdim  unsigned CondOpc = CondDef->getOpcode();
3522353358Sdim  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3523353358Sdim    return false;
3524353358Sdim
3525353358Sdim  AArch64CC::CondCode CondCode;
3526353358Sdim  if (CondOpc == TargetOpcode::G_ICMP) {
3527353358Sdim    CondCode = changeICMPPredToAArch64CC(
3528353358Sdim        (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3529353358Sdim    if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3530353358Sdim                            CondDef->getOperand(1), MIB)) {
3531353358Sdim      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3532353358Sdim      return false;
3533344779Sdim    }
3534353358Sdim  } else {
3535353358Sdim    // Get the condition code for the select.
3536353358Sdim    AArch64CC::CondCode CondCode2;
3537353358Sdim    changeFCMPPredToAArch64CC(
3538353358Sdim        (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3539353358Sdim        CondCode2);
3540353358Sdim
3541353358Sdim    // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3542353358Sdim    // instructions to emit the comparison.
3543353358Sdim    // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3544353358Sdim    // unnecessary.
3545353358Sdim    if (CondCode2 != AArch64CC::AL)
3546353358Sdim      return false;
3547353358Sdim
3548353358Sdim    // Make sure we'll be able to select the compare.
3549353358Sdim    unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3550353358Sdim    if (!CmpOpc)
3551353358Sdim      return false;
3552353358Sdim
3553353358Sdim    // Emit a new compare.
3554353358Sdim    auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3555353358Sdim    if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3556353358Sdim      Cmp.addUse(CondDef->getOperand(3).getReg());
3557353358Sdim    constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3558344779Sdim  }
3559353358Sdim
3560353358Sdim  // Emit the select.
3561353358Sdim  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3562353358Sdim  auto CSel =
3563353358Sdim      MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3564353358Sdim                     {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3565353358Sdim          .addImm(CondCode);
3566353358Sdim  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3567344779Sdim  I.eraseFromParent();
3568344779Sdim  return true;
3569344779Sdim}
3570344779Sdim
3571353358SdimMachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3572353358Sdim    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3573353358Sdim    MachineIRBuilder &MIRBuilder) const {
3574353358Sdim  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3575353358Sdim         "Unexpected MachineOperand");
3576353358Sdim  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3577353358Sdim  // We want to find this sort of thing:
3578353358Sdim  // x = G_SUB 0, y
3579353358Sdim  // G_ICMP z, x
3580353358Sdim  //
3581353358Sdim  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3582353358Sdim  // e.g:
3583353358Sdim  //
3584353358Sdim  // cmn z, y
3585353358Sdim
3586353358Sdim  // Helper lambda to detect the subtract followed by the compare.
3587353358Sdim  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3588353358Sdim  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3589353358Sdim    if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3590353358Sdim      return false;
3591353358Sdim
3592353358Sdim    // Need to make sure NZCV is the same at the end of the transformation.
3593353358Sdim    if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3594353358Sdim      return false;
3595353358Sdim
3596353358Sdim    // We want to match against SUBs.
3597353358Sdim    if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3598353358Sdim      return false;
3599353358Sdim
3600353358Sdim    // Make sure that we're getting
3601353358Sdim    // x = G_SUB 0, y
3602353358Sdim    auto ValAndVReg =
3603353358Sdim        getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3604353358Sdim    if (!ValAndVReg || ValAndVReg->Value != 0)
3605353358Sdim      return false;
3606353358Sdim
3607353358Sdim    // This can safely be represented as a CMN.
3608353358Sdim    return true;
3609353358Sdim  };
3610353358Sdim
3611353358Sdim  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3612353358Sdim  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3613353358Sdim  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3614353358Sdim  CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3615353358Sdim  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
3616353358Sdim
3617353358Sdim  // Given this:
3618353358Sdim  //
3619353358Sdim  // x = G_SUB 0, y
3620353358Sdim  // G_ICMP x, z
3621353358Sdim  //
3622353358Sdim  // Produce this:
3623353358Sdim  //
3624353358Sdim  // cmn y, z
3625353358Sdim  if (IsCMN(LHSDef, CC))
3626353358Sdim    return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3627353358Sdim
3628353358Sdim  // Same idea here, but with the RHS of the compare instead:
3629353358Sdim  //
3630353358Sdim  // Given this:
3631353358Sdim  //
3632353358Sdim  // x = G_SUB 0, y
3633353358Sdim  // G_ICMP z, x
3634353358Sdim  //
3635353358Sdim  // Produce this:
3636353358Sdim  //
3637353358Sdim  // cmn z, y
3638353358Sdim  if (IsCMN(RHSDef, CC))
3639353358Sdim    return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3640353358Sdim
3641353358Sdim  // Given this:
3642353358Sdim  //
3643353358Sdim  // z = G_AND x, y
3644353358Sdim  // G_ICMP z, 0
3645353358Sdim  //
3646353358Sdim  // Produce this if the compare is signed:
3647353358Sdim  //
3648353358Sdim  // tst x, y
3649353358Sdim  if (!isUnsignedICMPPred(P) && LHSDef &&
3650353358Sdim      LHSDef->getOpcode() == TargetOpcode::G_AND) {
3651353358Sdim    // Make sure that the RHS is 0.
3652353358Sdim    auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3653353358Sdim    if (!ValAndVReg || ValAndVReg->Value != 0)
3654353358Sdim      return nullptr;
3655353358Sdim
3656353358Sdim    return emitTST(LHSDef->getOperand(1).getReg(),
3657353358Sdim                   LHSDef->getOperand(2).getReg(), MIRBuilder);
3658353358Sdim  }
3659353358Sdim
3660353358Sdim  return nullptr;
3661353358Sdim}
3662353358Sdim
3663353358Sdimbool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3664353358Sdim  // Try to match a vector splat operation into a dup instruction.
3665353358Sdim  // We're looking for this pattern:
3666353358Sdim  //    %scalar:gpr(s64) = COPY $x0
3667353358Sdim  //    %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3668353358Sdim  //    %cst0:gpr(s32) = G_CONSTANT i32 0
3669353358Sdim  //    %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3670353358Sdim  //    %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3671353358Sdim  //    %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3672353358Sdim  //                                             %zerovec(<2 x s32>)
3673353358Sdim  //
3674353358Sdim  // ...into:
3675353358Sdim  // %splat = DUP %scalar
3676353358Sdim  // We use the regbank of the scalar to determine which kind of dup to use.
3677353358Sdim  MachineIRBuilder MIB(I);
3678353358Sdim  MachineRegisterInfo &MRI = *MIB.getMRI();
3679353358Sdim  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3680353358Sdim  using namespace TargetOpcode;
3681353358Sdim  using namespace MIPatternMatch;
3682353358Sdim
3683353358Sdim  // Begin matching the insert.
3684353358Sdim  auto *InsMI =
3685353358Sdim      getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3686353358Sdim  if (!InsMI)
3687353358Sdim    return false;
3688353358Sdim  // Match the undef vector operand.
3689353358Sdim  auto *UndefMI =
3690353358Sdim      getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3691353358Sdim  if (!UndefMI)
3692353358Sdim    return false;
3693353358Sdim  // Match the scalar being splatted.
3694353358Sdim  Register ScalarReg = InsMI->getOperand(2).getReg();
3695353358Sdim  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3696353358Sdim  // Match the index constant 0.
3697353358Sdim  int64_t Index = 0;
3698353358Sdim  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3699353358Sdim    return false;
3700353358Sdim
3701353358Sdim  // The shuffle's second operand doesn't matter if the mask is all zero.
3702360784Sdim  ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
3703360784Sdim  if (!all_of(Mask, [](int Elem) { return Elem == 0; }))
3704353358Sdim    return false;
3705353358Sdim
3706353358Sdim  // We're done, now find out what kind of splat we need.
3707353358Sdim  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3708353358Sdim  LLT EltTy = VecTy.getElementType();
3709360784Sdim  if (EltTy.getSizeInBits() < 32) {
3710360784Sdim    LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
3711353358Sdim    return false;
3712353358Sdim  }
3713353358Sdim  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3714360784Sdim  unsigned Opc = 0;
3715360784Sdim  if (IsFP) {
3716360784Sdim    switch (EltTy.getSizeInBits()) {
3717360784Sdim    case 32:
3718360784Sdim      if (VecTy.getNumElements() == 2) {
3719360784Sdim        Opc = AArch64::DUPv2i32lane;
3720360784Sdim      } else {
3721360784Sdim        Opc = AArch64::DUPv4i32lane;
3722360784Sdim        assert(VecTy.getNumElements() == 4);
3723360784Sdim      }
3724360784Sdim      break;
3725360784Sdim    case 64:
3726360784Sdim      assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
3727360784Sdim      Opc = AArch64::DUPv2i64lane;
3728360784Sdim      break;
3729360784Sdim    }
3730360784Sdim  } else {
3731360784Sdim    switch (EltTy.getSizeInBits()) {
3732360784Sdim    case 32:
3733360784Sdim      if (VecTy.getNumElements() == 2) {
3734360784Sdim        Opc = AArch64::DUPv2i32gpr;
3735360784Sdim      } else {
3736360784Sdim        Opc = AArch64::DUPv4i32gpr;
3737360784Sdim        assert(VecTy.getNumElements() == 4);
3738360784Sdim      }
3739360784Sdim      break;
3740360784Sdim    case 64:
3741360784Sdim      assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
3742360784Sdim      Opc = AArch64::DUPv2i64gpr;
3743360784Sdim      break;
3744360784Sdim    }
3745360784Sdim  }
3746360784Sdim  assert(Opc && "Did not compute an opcode for a dup");
3747353358Sdim
3748353358Sdim  // For FP splats, we need to widen the scalar reg via undef too.
3749353358Sdim  if (IsFP) {
3750353358Sdim    MachineInstr *Widen = emitScalarToVector(
3751353358Sdim        EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3752353358Sdim    if (!Widen)
3753353358Sdim      return false;
3754353358Sdim    ScalarReg = Widen->getOperand(0).getReg();
3755353358Sdim  }
3756353358Sdim  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3757353358Sdim  if (IsFP)
3758353358Sdim    Dup.addImm(0);
3759353358Sdim  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3760353358Sdim  I.eraseFromParent();
3761353358Sdim  return true;
3762353358Sdim}
3763353358Sdim
3764353358Sdimbool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3765353358Sdim  if (TM.getOptLevel() == CodeGenOpt::None)
3766353358Sdim    return false;
3767353358Sdim  if (tryOptVectorDup(I))
3768353358Sdim    return true;
3769353358Sdim  return false;
3770353358Sdim}
3771353358Sdim
3772353358Sdimbool AArch64InstructionSelector::selectShuffleVector(
3773353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
3774353358Sdim  if (tryOptVectorShuffle(I))
3775353358Sdim    return true;
3776353358Sdim  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3777353358Sdim  Register Src1Reg = I.getOperand(1).getReg();
3778353358Sdim  const LLT Src1Ty = MRI.getType(Src1Reg);
3779353358Sdim  Register Src2Reg = I.getOperand(2).getReg();
3780353358Sdim  const LLT Src2Ty = MRI.getType(Src2Reg);
3781360784Sdim  ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
3782353358Sdim
3783353358Sdim  MachineBasicBlock &MBB = *I.getParent();
3784321369Sdim  MachineFunction &MF = *MBB.getParent();
3785353358Sdim  LLVMContext &Ctx = MF.getFunction().getContext();
3786321369Sdim
3787353358Sdim  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3788353358Sdim  // it's originated from a <1 x T> type. Those should have been lowered into
3789353358Sdim  // G_BUILD_VECTOR earlier.
3790353358Sdim  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3791353358Sdim    LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3792353358Sdim    return false;
3793353358Sdim  }
3794353358Sdim
3795353358Sdim  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3796353358Sdim
3797353358Sdim  SmallVector<Constant *, 64> CstIdxs;
3798360784Sdim  for (int Val : Mask) {
3799353358Sdim    // For now, any undef indexes we'll just assume to be 0. This should be
3800353358Sdim    // optimized in future, e.g. to select DUP etc.
3801360784Sdim    Val = Val < 0 ? 0 : Val;
3802353358Sdim    for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3803353358Sdim      unsigned Offset = Byte + Val * BytesPerElt;
3804353358Sdim      CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3805353358Sdim    }
3806353358Sdim  }
3807353358Sdim
3808353358Sdim  MachineIRBuilder MIRBuilder(I);
3809353358Sdim
3810353358Sdim  // Use a constant pool to load the index vector for TBL.
3811353358Sdim  Constant *CPVal = ConstantVector::get(CstIdxs);
3812353358Sdim  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3813353358Sdim  if (!IndexLoad) {
3814353358Sdim    LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3815353358Sdim    return false;
3816353358Sdim  }
3817353358Sdim
3818353358Sdim  if (DstTy.getSizeInBits() != 128) {
3819353358Sdim    assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3820353358Sdim    // This case can be done with TBL1.
3821353358Sdim    MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3822353358Sdim    if (!Concat) {
3823353358Sdim      LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3824353358Sdim      return false;
3825353358Sdim    }
3826353358Sdim
3827353358Sdim    // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3828353358Sdim    IndexLoad =
3829353358Sdim        emitScalarToVector(64, &AArch64::FPR128RegClass,
3830353358Sdim                           IndexLoad->getOperand(0).getReg(), MIRBuilder);
3831353358Sdim
3832353358Sdim    auto TBL1 = MIRBuilder.buildInstr(
3833353358Sdim        AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3834353358Sdim        {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3835353358Sdim    constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3836353358Sdim
3837353358Sdim    auto Copy =
3838353358Sdim        MIRBuilder
3839353358Sdim            .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3840353358Sdim            .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3841353358Sdim    RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3842353358Sdim    I.eraseFromParent();
3843353358Sdim    return true;
3844353358Sdim  }
3845353358Sdim
3846353358Sdim  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3847353358Sdim  // Q registers for regalloc.
3848353358Sdim  auto RegSeq = MIRBuilder
3849353358Sdim                    .buildInstr(TargetOpcode::REG_SEQUENCE,
3850353358Sdim                                {&AArch64::QQRegClass}, {Src1Reg})
3851353358Sdim                    .addImm(AArch64::qsub0)
3852353358Sdim                    .addUse(Src2Reg)
3853353358Sdim                    .addImm(AArch64::qsub1);
3854353358Sdim
3855353358Sdim  auto TBL2 =
3856353358Sdim      MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3857353358Sdim                            {RegSeq, IndexLoad->getOperand(0).getReg()});
3858353358Sdim  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3859353358Sdim  constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3860353358Sdim  I.eraseFromParent();
3861353358Sdim  return true;
3862353358Sdim}
3863353358Sdim
3864353358SdimMachineInstr *AArch64InstructionSelector::emitLaneInsert(
3865353358Sdim    Optional<Register> DstReg, Register SrcReg, Register EltReg,
3866353358Sdim    unsigned LaneIdx, const RegisterBank &RB,
3867353358Sdim    MachineIRBuilder &MIRBuilder) const {
3868353358Sdim  MachineInstr *InsElt = nullptr;
3869353358Sdim  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3870353358Sdim  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3871353358Sdim
3872353358Sdim  // Create a register to define with the insert if one wasn't passed in.
3873353358Sdim  if (!DstReg)
3874353358Sdim    DstReg = MRI.createVirtualRegister(DstRC);
3875353358Sdim
3876353358Sdim  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3877353358Sdim  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3878353358Sdim
3879353358Sdim  if (RB.getID() == AArch64::FPRRegBankID) {
3880353358Sdim    auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3881353358Sdim    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3882353358Sdim                 .addImm(LaneIdx)
3883353358Sdim                 .addUse(InsSub->getOperand(0).getReg())
3884353358Sdim                 .addImm(0);
3885353358Sdim  } else {
3886353358Sdim    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3887353358Sdim                 .addImm(LaneIdx)
3888353358Sdim                 .addUse(EltReg);
3889353358Sdim  }
3890353358Sdim
3891353358Sdim  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3892353358Sdim  return InsElt;
3893353358Sdim}
3894353358Sdim
3895353358Sdimbool AArch64InstructionSelector::selectInsertElt(
3896353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
3897353358Sdim  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3898353358Sdim
3899353358Sdim  // Get information on the destination.
3900353358Sdim  Register DstReg = I.getOperand(0).getReg();
3901353358Sdim  const LLT DstTy = MRI.getType(DstReg);
3902353358Sdim  unsigned VecSize = DstTy.getSizeInBits();
3903353358Sdim
3904353358Sdim  // Get information on the element we want to insert into the destination.
3905353358Sdim  Register EltReg = I.getOperand(2).getReg();
3906353358Sdim  const LLT EltTy = MRI.getType(EltReg);
3907353358Sdim  unsigned EltSize = EltTy.getSizeInBits();
3908353358Sdim  if (EltSize < 16 || EltSize > 64)
3909353358Sdim    return false; // Don't support all element types yet.
3910353358Sdim
3911353358Sdim  // Find the definition of the index. Bail out if it's not defined by a
3912353358Sdim  // G_CONSTANT.
3913353358Sdim  Register IdxReg = I.getOperand(3).getReg();
3914353358Sdim  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3915353358Sdim  if (!VRegAndVal)
3916353358Sdim    return false;
3917353358Sdim  unsigned LaneIdx = VRegAndVal->Value;
3918353358Sdim
3919353358Sdim  // Perform the lane insert.
3920353358Sdim  Register SrcReg = I.getOperand(1).getReg();
3921353358Sdim  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3922353358Sdim  MachineIRBuilder MIRBuilder(I);
3923353358Sdim
3924353358Sdim  if (VecSize < 128) {
3925353358Sdim    // If the vector we're inserting into is smaller than 128 bits, widen it
3926353358Sdim    // to 128 to do the insert.
3927353358Sdim    MachineInstr *ScalarToVec = emitScalarToVector(
3928353358Sdim        VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3929353358Sdim    if (!ScalarToVec)
3930353358Sdim      return false;
3931353358Sdim    SrcReg = ScalarToVec->getOperand(0).getReg();
3932353358Sdim  }
3933353358Sdim
3934353358Sdim  // Create an insert into a new FPR128 register.
3935353358Sdim  // Note that if our vector is already 128 bits, we end up emitting an extra
3936353358Sdim  // register.
3937353358Sdim  MachineInstr *InsMI =
3938353358Sdim      emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3939353358Sdim
3940353358Sdim  if (VecSize < 128) {
3941353358Sdim    // If we had to widen to perform the insert, then we have to demote back to
3942353358Sdim    // the original size to get the result we want.
3943353358Sdim    Register DemoteVec = InsMI->getOperand(0).getReg();
3944353358Sdim    const TargetRegisterClass *RC =
3945353358Sdim        getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3946353358Sdim    if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3947353358Sdim      LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3948353358Sdim      return false;
3949353358Sdim    }
3950353358Sdim    unsigned SubReg = 0;
3951353358Sdim    if (!getSubRegForClass(RC, TRI, SubReg))
3952353358Sdim      return false;
3953353358Sdim    if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3954353358Sdim      LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3955353358Sdim                        << "\n");
3956353358Sdim      return false;
3957353358Sdim    }
3958353358Sdim    MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3959353358Sdim        .addReg(DemoteVec, 0, SubReg);
3960353358Sdim    RBI.constrainGenericRegister(DstReg, *RC, MRI);
3961353358Sdim  } else {
3962353358Sdim    // No widening needed.
3963353358Sdim    InsMI->getOperand(0).setReg(DstReg);
3964353358Sdim    constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3965353358Sdim  }
3966353358Sdim
3967353358Sdim  I.eraseFromParent();
3968353358Sdim  return true;
3969353358Sdim}
3970353358Sdim
3971353358Sdimbool AArch64InstructionSelector::selectBuildVector(
3972353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
3973353358Sdim  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3974353358Sdim  // Until we port more of the optimized selections, for now just use a vector
3975353358Sdim  // insert sequence.
3976353358Sdim  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3977353358Sdim  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3978353358Sdim  unsigned EltSize = EltTy.getSizeInBits();
3979353358Sdim  if (EltSize < 16 || EltSize > 64)
3980353358Sdim    return false; // Don't support all element types yet.
3981353358Sdim  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3982353358Sdim  MachineIRBuilder MIRBuilder(I);
3983353358Sdim
3984353358Sdim  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3985353358Sdim  MachineInstr *ScalarToVec =
3986353358Sdim      emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3987353358Sdim                         I.getOperand(1).getReg(), MIRBuilder);
3988353358Sdim  if (!ScalarToVec)
3989353358Sdim    return false;
3990353358Sdim
3991353358Sdim  Register DstVec = ScalarToVec->getOperand(0).getReg();
3992353358Sdim  unsigned DstSize = DstTy.getSizeInBits();
3993353358Sdim
3994353358Sdim  // Keep track of the last MI we inserted. Later on, we might be able to save
3995353358Sdim  // a copy using it.
3996353358Sdim  MachineInstr *PrevMI = nullptr;
3997353358Sdim  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3998353358Sdim    // Note that if we don't do a subregister copy, we can end up making an
3999353358Sdim    // extra register.
4000353358Sdim    PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4001353358Sdim                              MIRBuilder);
4002353358Sdim    DstVec = PrevMI->getOperand(0).getReg();
4003353358Sdim  }
4004353358Sdim
4005353358Sdim  // If DstTy's size in bits is less than 128, then emit a subregister copy
4006353358Sdim  // from DstVec to the last register we've defined.
4007353358Sdim  if (DstSize < 128) {
4008353358Sdim    // Force this to be FPR using the destination vector.
4009353358Sdim    const TargetRegisterClass *RC =
4010353358Sdim        getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4011353358Sdim    if (!RC)
4012353358Sdim      return false;
4013353358Sdim    if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4014353358Sdim      LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4015353358Sdim      return false;
4016353358Sdim    }
4017353358Sdim
4018353358Sdim    unsigned SubReg = 0;
4019353358Sdim    if (!getSubRegForClass(RC, TRI, SubReg))
4020353358Sdim      return false;
4021353358Sdim    if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4022353358Sdim      LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4023353358Sdim                        << "\n");
4024353358Sdim      return false;
4025353358Sdim    }
4026353358Sdim
4027353358Sdim    Register Reg = MRI.createVirtualRegister(RC);
4028353358Sdim    Register DstReg = I.getOperand(0).getReg();
4029353358Sdim
4030353358Sdim    MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4031353358Sdim        .addReg(DstVec, 0, SubReg);
4032353358Sdim    MachineOperand &RegOp = I.getOperand(1);
4033353358Sdim    RegOp.setReg(Reg);
4034353358Sdim    RBI.constrainGenericRegister(DstReg, *RC, MRI);
4035353358Sdim  } else {
4036353358Sdim    // We don't need a subregister copy. Save a copy by re-using the
4037353358Sdim    // destination register on the final insert.
4038353358Sdim    assert(PrevMI && "PrevMI was null?");
4039353358Sdim    PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4040353358Sdim    constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4041353358Sdim  }
4042353358Sdim
4043353358Sdim  I.eraseFromParent();
4044353358Sdim  return true;
4045353358Sdim}
4046353358Sdim
4047353358Sdim/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4048353358Sdim/// ID if it exists, and 0 otherwise.
4049353358Sdimstatic unsigned findIntrinsicID(MachineInstr &I) {
4050353358Sdim  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4051353358Sdim    return Op.isIntrinsicID();
4052353358Sdim  });
4053353358Sdim  if (IntrinOp == I.operands_end())
4054353358Sdim    return 0;
4055353358Sdim  return IntrinOp->getIntrinsicID();
4056353358Sdim}
4057353358Sdim
4058353358Sdimbool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4059353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
4060353358Sdim  // Find the intrinsic ID.
4061353358Sdim  unsigned IntrinID = findIntrinsicID(I);
4062353358Sdim  if (!IntrinID)
4063353358Sdim    return false;
4064353358Sdim  MachineIRBuilder MIRBuilder(I);
4065353358Sdim
4066353358Sdim  // Select the instruction.
4067353358Sdim  switch (IntrinID) {
4068353358Sdim  default:
4069353358Sdim    return false;
4070353358Sdim  case Intrinsic::trap:
4071353358Sdim    MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4072353358Sdim    break;
4073353358Sdim  case Intrinsic::debugtrap:
4074353358Sdim    if (!STI.isTargetWindows())
4075353358Sdim      return false;
4076353358Sdim    MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4077353358Sdim    break;
4078353358Sdim  }
4079353358Sdim
4080353358Sdim  I.eraseFromParent();
4081353358Sdim  return true;
4082353358Sdim}
4083353358Sdim
4084353358Sdimbool AArch64InstructionSelector::selectIntrinsic(
4085353358Sdim    MachineInstr &I, MachineRegisterInfo &MRI) const {
4086353358Sdim  unsigned IntrinID = findIntrinsicID(I);
4087353358Sdim  if (!IntrinID)
4088353358Sdim    return false;
4089353358Sdim  MachineIRBuilder MIRBuilder(I);
4090353358Sdim
4091353358Sdim  switch (IntrinID) {
4092353358Sdim  default:
4093353358Sdim    break;
4094353358Sdim  case Intrinsic::aarch64_crypto_sha1h:
4095353358Sdim    Register DstReg = I.getOperand(0).getReg();
4096353358Sdim    Register SrcReg = I.getOperand(2).getReg();
4097353358Sdim
4098353358Sdim    // FIXME: Should this be an assert?
4099353358Sdim    if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4100353358Sdim        MRI.getType(SrcReg).getSizeInBits() != 32)
4101353358Sdim      return false;
4102353358Sdim
4103353358Sdim    // The operation has to happen on FPRs. Set up some new FPR registers for
4104353358Sdim    // the source and destination if they are on GPRs.
4105353358Sdim    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4106353358Sdim      SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4107353358Sdim      MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4108353358Sdim
4109353358Sdim      // Make sure the copy ends up getting constrained properly.
4110353358Sdim      RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4111353358Sdim                                   AArch64::GPR32RegClass, MRI);
4112353358Sdim    }
4113353358Sdim
4114353358Sdim    if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4115353358Sdim      DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4116353358Sdim
4117353358Sdim    // Actually insert the instruction.
4118353358Sdim    auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4119353358Sdim    constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4120353358Sdim
4121353358Sdim    // Did we create a new register for the destination?
4122353358Sdim    if (DstReg != I.getOperand(0).getReg()) {
4123353358Sdim      // Yep. Copy the result of the instruction back into the original
4124353358Sdim      // destination.
4125353358Sdim      MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4126353358Sdim      RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4127353358Sdim                                   AArch64::GPR32RegClass, MRI);
4128353358Sdim    }
4129353358Sdim
4130353358Sdim    I.eraseFromParent();
4131353358Sdim    return true;
4132353358Sdim  }
4133353358Sdim  return false;
4134353358Sdim}
4135353358Sdim
4136353358Sdimstatic Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
4137353358Sdim  auto &MI = *Root.getParent();
4138353358Sdim  auto &MBB = *MI.getParent();
4139353358Sdim  auto &MF = *MBB.getParent();
4140353358Sdim  auto &MRI = MF.getRegInfo();
4141321369Sdim  uint64_t Immed;
4142321369Sdim  if (Root.isImm())
4143321369Sdim    Immed = Root.getImm();
4144321369Sdim  else if (Root.isCImm())
4145321369Sdim    Immed = Root.getCImm()->getZExtValue();
4146321369Sdim  else if (Root.isReg()) {
4147353358Sdim    auto ValAndVReg =
4148353358Sdim        getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
4149353358Sdim    if (!ValAndVReg)
4150327952Sdim      return None;
4151353358Sdim    Immed = ValAndVReg->Value;
4152321369Sdim  } else
4153327952Sdim    return None;
4154353358Sdim  return Immed;
4155353358Sdim}
4156321369Sdim
4157353358SdimInstructionSelector::ComplexRendererFns
4158353358SdimAArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4159353358Sdim  auto MaybeImmed = getImmedFromMO(Root);
4160353358Sdim  if (MaybeImmed == None || *MaybeImmed > 31)
4161353358Sdim    return None;
4162353358Sdim  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4163353358Sdim  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4164353358Sdim}
4165353358Sdim
4166353358SdimInstructionSelector::ComplexRendererFns
4167353358SdimAArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4168353358Sdim  auto MaybeImmed = getImmedFromMO(Root);
4169353358Sdim  if (MaybeImmed == None || *MaybeImmed > 31)
4170353358Sdim    return None;
4171353358Sdim  uint64_t Enc = 31 - *MaybeImmed;
4172353358Sdim  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4173353358Sdim}
4174353358Sdim
4175353358SdimInstructionSelector::ComplexRendererFns
4176353358SdimAArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4177353358Sdim  auto MaybeImmed = getImmedFromMO(Root);
4178353358Sdim  if (MaybeImmed == None || *MaybeImmed > 63)
4179353358Sdim    return None;
4180353358Sdim  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4181353358Sdim  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4182353358Sdim}
4183353358Sdim
4184353358SdimInstructionSelector::ComplexRendererFns
4185353358SdimAArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4186353358Sdim  auto MaybeImmed = getImmedFromMO(Root);
4187353358Sdim  if (MaybeImmed == None || *MaybeImmed > 63)
4188353358Sdim    return None;
4189353358Sdim  uint64_t Enc = 63 - *MaybeImmed;
4190353358Sdim  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4191353358Sdim}
4192353358Sdim
4193360784Sdim/// Helper to select an immediate value that can be represented as a 12-bit
4194360784Sdim/// value shifted left by either 0 or 12. If it is possible to do so, return
4195360784Sdim/// the immediate and shift value. If not, return None.
4196360784Sdim///
4197360784Sdim/// Used by selectArithImmed and selectNegArithImmed.
4198360784SdimInstructionSelector::ComplexRendererFns
4199360784SdimAArch64InstructionSelector::select12BitValueWithLeftShift(
4200360784Sdim    uint64_t Immed) const {
4201360784Sdim  unsigned ShiftAmt;
4202360784Sdim  if (Immed >> 12 == 0) {
4203360784Sdim    ShiftAmt = 0;
4204360784Sdim  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4205360784Sdim    ShiftAmt = 12;
4206360784Sdim    Immed = Immed >> 12;
4207360784Sdim  } else
4208360784Sdim    return None;
4209360784Sdim
4210360784Sdim  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4211360784Sdim  return {{
4212360784Sdim      [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4213360784Sdim      [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4214360784Sdim  }};
4215360784Sdim}
4216360784Sdim
4217353358Sdim/// SelectArithImmed - Select an immediate value that can be represented as
4218353358Sdim/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
4219353358Sdim/// Val set to the 12-bit value and Shift set to the shifter operand.
4220353358SdimInstructionSelector::ComplexRendererFns
4221353358SdimAArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4222353358Sdim  // This function is called from the addsub_shifted_imm ComplexPattern,
4223353358Sdim  // which lists [imm] as the list of opcode it's interested in, however
4224353358Sdim  // we still need to check whether the operand is actually an immediate
4225353358Sdim  // here because the ComplexPattern opcode list is only used in
4226353358Sdim  // root-level opcode matching.
4227353358Sdim  auto MaybeImmed = getImmedFromMO(Root);
4228353358Sdim  if (MaybeImmed == None)
4229353358Sdim    return None;
4230360784Sdim  return select12BitValueWithLeftShift(*MaybeImmed);
4231360784Sdim}
4232360784Sdim
4233360784Sdim/// SelectNegArithImmed - As above, but negates the value before trying to
4234360784Sdim/// select it.
4235360784SdimInstructionSelector::ComplexRendererFns
4236360784SdimAArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4237360784Sdim  // We need a register here, because we need to know if we have a 64 or 32
4238360784Sdim  // bit immediate.
4239360784Sdim  if (!Root.isReg())
4240360784Sdim    return None;
4241360784Sdim  auto MaybeImmed = getImmedFromMO(Root);
4242360784Sdim  if (MaybeImmed == None)
4243360784Sdim    return None;
4244353358Sdim  uint64_t Immed = *MaybeImmed;
4245321369Sdim
4246360784Sdim  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4247360784Sdim  // have the opposite effect on the C flag, so this pattern mustn't match under
4248360784Sdim  // those circumstances.
4249360784Sdim  if (Immed == 0)
4250327952Sdim    return None;
4251321369Sdim
4252360784Sdim  // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4253360784Sdim  // the root.
4254360784Sdim  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4255360784Sdim  if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4256360784Sdim    Immed = ~((uint32_t)Immed) + 1;
4257360784Sdim  else
4258360784Sdim    Immed = ~Immed + 1ULL;
4259360784Sdim
4260360784Sdim  if (Immed & 0xFFFFFFFFFF000000ULL)
4261360784Sdim    return None;
4262360784Sdim
4263360784Sdim  Immed &= 0xFFFFFFULL;
4264360784Sdim  return select12BitValueWithLeftShift(Immed);
4265321369Sdim}
4266321369Sdim
4267360784Sdim/// Return true if it is worth folding MI into an extended register. That is,
4268360784Sdim/// if it's safe to pull it into the addressing mode of a load or store as a
4269360784Sdim/// shift.
4270360784Sdimbool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4271360784Sdim    MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4272360784Sdim  // Always fold if there is one use, or if we're optimizing for size.
4273360784Sdim  Register DefReg = MI.getOperand(0).getReg();
4274360784Sdim  if (MRI.hasOneUse(DefReg) ||
4275360784Sdim      MI.getParent()->getParent()->getFunction().hasMinSize())
4276360784Sdim    return true;
4277360784Sdim
4278360784Sdim  // It's better to avoid folding and recomputing shifts when we don't have a
4279360784Sdim  // fastpath.
4280360784Sdim  if (!STI.hasLSLFast())
4281360784Sdim    return false;
4282360784Sdim
4283360784Sdim  // We have a fastpath, so folding a shift in and potentially computing it
4284360784Sdim  // many times may be beneficial. Check if this is only used in memory ops.
4285360784Sdim  // If it is, then we should fold.
4286360784Sdim  return all_of(MRI.use_instructions(DefReg),
4287360784Sdim                [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4288360784Sdim}
4289360784Sdim
4290360784SdimInstructionSelector::ComplexRendererFns
4291360784SdimAArch64InstructionSelector::selectExtendedSHL(
4292360784Sdim    MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
4293360784Sdim    unsigned SizeInBytes, bool WantsExt) const {
4294360784Sdim  assert(Base.isReg() && "Expected base to be a register operand");
4295360784Sdim  assert(Offset.isReg() && "Expected offset to be a register operand");
4296360784Sdim
4297360784Sdim  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4298360784Sdim  MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
4299360784Sdim  if (!OffsetInst)
4300360784Sdim    return None;
4301360784Sdim
4302360784Sdim  unsigned OffsetOpc = OffsetInst->getOpcode();
4303360784Sdim  if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4304360784Sdim    return None;
4305360784Sdim
4306360784Sdim  // Make sure that the memory op is a valid size.
4307360784Sdim  int64_t LegalShiftVal = Log2_32(SizeInBytes);
4308360784Sdim  if (LegalShiftVal == 0)
4309360784Sdim    return None;
4310360784Sdim  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4311360784Sdim    return None;
4312360784Sdim
4313360784Sdim  // Now, try to find the specific G_CONSTANT. Start by assuming that the
4314360784Sdim  // register we will offset is the LHS, and the register containing the
4315360784Sdim  // constant is the RHS.
4316360784Sdim  Register OffsetReg = OffsetInst->getOperand(1).getReg();
4317360784Sdim  Register ConstantReg = OffsetInst->getOperand(2).getReg();
4318360784Sdim  auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4319360784Sdim  if (!ValAndVReg) {
4320360784Sdim    // We didn't get a constant on the RHS. If the opcode is a shift, then
4321360784Sdim    // we're done.
4322360784Sdim    if (OffsetOpc == TargetOpcode::G_SHL)
4323360784Sdim      return None;
4324360784Sdim
4325360784Sdim    // If we have a G_MUL, we can use either register. Try looking at the RHS.
4326360784Sdim    std::swap(OffsetReg, ConstantReg);
4327360784Sdim    ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4328360784Sdim    if (!ValAndVReg)
4329360784Sdim      return None;
4330360784Sdim  }
4331360784Sdim
4332360784Sdim  // The value must fit into 3 bits, and must be positive. Make sure that is
4333360784Sdim  // true.
4334360784Sdim  int64_t ImmVal = ValAndVReg->Value;
4335360784Sdim
4336360784Sdim  // Since we're going to pull this into a shift, the constant value must be
4337360784Sdim  // a power of 2. If we got a multiply, then we need to check this.
4338360784Sdim  if (OffsetOpc == TargetOpcode::G_MUL) {
4339360784Sdim    if (!isPowerOf2_32(ImmVal))
4340360784Sdim      return None;
4341360784Sdim
4342360784Sdim    // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4343360784Sdim    ImmVal = Log2_32(ImmVal);
4344360784Sdim  }
4345360784Sdim
4346360784Sdim  if ((ImmVal & 0x7) != ImmVal)
4347360784Sdim    return None;
4348360784Sdim
4349360784Sdim  // We are only allowed to shift by LegalShiftVal. This shift value is built
4350360784Sdim  // into the instruction, so we can't just use whatever we want.
4351360784Sdim  if (ImmVal != LegalShiftVal)
4352360784Sdim    return None;
4353360784Sdim
4354360784Sdim  unsigned SignExtend = 0;
4355360784Sdim  if (WantsExt) {
4356360784Sdim    // Check if the offset is defined by an extend.
4357360784Sdim    MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
4358360784Sdim    auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
4359360784Sdim    if (Ext == AArch64_AM::InvalidShiftExtend)
4360360784Sdim      return None;
4361360784Sdim
4362360784Sdim    SignExtend = Ext == AArch64_AM::SXTW;
4363360784Sdim
4364360784Sdim    // Need a 32-bit wide register here.
4365360784Sdim    MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
4366360784Sdim    OffsetReg = ExtInst->getOperand(1).getReg();
4367360784Sdim    OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
4368360784Sdim  }
4369360784Sdim
4370360784Sdim  // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4371360784Sdim  // offset. Signify that we are shifting by setting the shift flag to 1.
4372360784Sdim  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
4373360784Sdim           [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4374360784Sdim           [=](MachineInstrBuilder &MIB) {
4375360784Sdim             // Need to add both immediates here to make sure that they are both
4376360784Sdim             // added to the instruction.
4377360784Sdim             MIB.addImm(SignExtend);
4378360784Sdim             MIB.addImm(1);
4379360784Sdim           }}};
4380360784Sdim}
4381360784Sdim
4382360784Sdim/// This is used for computing addresses like this:
4383360784Sdim///
4384360784Sdim/// ldr x1, [x2, x3, lsl #3]
4385360784Sdim///
4386360784Sdim/// Where x2 is the base register, and x3 is an offset register. The shift-left
4387360784Sdim/// is a constant value specific to this load instruction. That is, we'll never
4388360784Sdim/// see anything other than a 3 here (which corresponds to the size of the
4389360784Sdim/// element being loaded.)
4390360784SdimInstructionSelector::ComplexRendererFns
4391360784SdimAArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4392360784Sdim    MachineOperand &Root, unsigned SizeInBytes) const {
4393360784Sdim  if (!Root.isReg())
4394360784Sdim    return None;
4395360784Sdim  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4396360784Sdim
4397360784Sdim  // We want to find something like this:
4398360784Sdim  //
4399360784Sdim  // val = G_CONSTANT LegalShiftVal
4400360784Sdim  // shift = G_SHL off_reg val
4401360784Sdim  // ptr = G_PTR_ADD base_reg shift
4402360784Sdim  // x = G_LOAD ptr
4403360784Sdim  //
4404360784Sdim  // And fold it into this addressing mode:
4405360784Sdim  //
4406360784Sdim  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4407360784Sdim
4408360784Sdim  // Check if we can find the G_PTR_ADD.
4409360784Sdim  MachineInstr *PtrAdd =
4410360784Sdim      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
4411360784Sdim  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
4412360784Sdim    return None;
4413360784Sdim
4414360784Sdim  // Now, try to match an opcode which will match our specific offset.
4415360784Sdim  // We want a G_SHL or a G_MUL.
4416360784Sdim  MachineInstr *OffsetInst =
4417360784Sdim      getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
4418360784Sdim  return selectExtendedSHL(Root, PtrAdd->getOperand(1),
4419360784Sdim                           OffsetInst->getOperand(0), SizeInBytes,
4420360784Sdim                           /*WantsExt=*/false);
4421360784Sdim}
4422360784Sdim
4423360784Sdim/// This is used for computing addresses like this:
4424360784Sdim///
4425360784Sdim/// ldr x1, [x2, x3]
4426360784Sdim///
4427360784Sdim/// Where x2 is the base register, and x3 is an offset register.
4428360784Sdim///
4429360784Sdim/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
4430360784Sdim/// this will do so. Otherwise, it will return None.
4431360784SdimInstructionSelector::ComplexRendererFns
4432360784SdimAArch64InstructionSelector::selectAddrModeRegisterOffset(
4433360784Sdim    MachineOperand &Root) const {
4434360784Sdim  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4435360784Sdim
4436360784Sdim  // We need a GEP.
4437360784Sdim  MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4438360784Sdim  if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
4439360784Sdim    return None;
4440360784Sdim
4441360784Sdim  // If this is used more than once, let's not bother folding.
4442360784Sdim  // TODO: Check if they are memory ops. If they are, then we can still fold
4443360784Sdim  // without having to recompute anything.
4444360784Sdim  if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4445360784Sdim    return None;
4446360784Sdim
4447360784Sdim  // Base is the GEP's LHS, offset is its RHS.
4448360784Sdim  return {{[=](MachineInstrBuilder &MIB) {
4449360784Sdim             MIB.addUse(Gep->getOperand(1).getReg());
4450360784Sdim           },
4451360784Sdim           [=](MachineInstrBuilder &MIB) {
4452360784Sdim             MIB.addUse(Gep->getOperand(2).getReg());
4453360784Sdim           },
4454360784Sdim           [=](MachineInstrBuilder &MIB) {
4455360784Sdim             // Need to add both immediates here to make sure that they are both
4456360784Sdim             // added to the instruction.
4457360784Sdim             MIB.addImm(0);
4458360784Sdim             MIB.addImm(0);
4459360784Sdim           }}};
4460360784Sdim}
4461360784Sdim
4462360784Sdim/// This is intended to be equivalent to selectAddrModeXRO in
4463360784Sdim/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4464360784SdimInstructionSelector::ComplexRendererFns
4465360784SdimAArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4466360784Sdim                                              unsigned SizeInBytes) const {
4467360784Sdim  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4468360784Sdim
4469360784Sdim  // If we have a constant offset, then we probably don't want to match a
4470360784Sdim  // register offset.
4471360784Sdim  if (isBaseWithConstantOffset(Root, MRI))
4472360784Sdim    return None;
4473360784Sdim
4474360784Sdim  // Try to fold shifts into the addressing mode.
4475360784Sdim  auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4476360784Sdim  if (AddrModeFns)
4477360784Sdim    return AddrModeFns;
4478360784Sdim
4479360784Sdim  // If that doesn't work, see if it's possible to fold in registers from
4480360784Sdim  // a GEP.
4481360784Sdim  return selectAddrModeRegisterOffset(Root);
4482360784Sdim}
4483360784Sdim
4484360784Sdim/// This is used for computing addresses like this:
4485360784Sdim///
4486360784Sdim/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
4487360784Sdim///
4488360784Sdim/// Where we have a 64-bit base register, a 32-bit offset register, and an
4489360784Sdim/// extend (which may or may not be signed).
4490360784SdimInstructionSelector::ComplexRendererFns
4491360784SdimAArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
4492360784Sdim                                              unsigned SizeInBytes) const {
4493360784Sdim  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4494360784Sdim
4495360784Sdim  MachineInstr *PtrAdd =
4496360784Sdim      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
4497360784Sdim  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
4498360784Sdim    return None;
4499360784Sdim
4500360784Sdim  MachineOperand &LHS = PtrAdd->getOperand(1);
4501360784Sdim  MachineOperand &RHS = PtrAdd->getOperand(2);
4502360784Sdim  MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
4503360784Sdim
4504360784Sdim  // The first case is the same as selectAddrModeXRO, except we need an extend.
4505360784Sdim  // In this case, we try to find a shift and extend, and fold them into the
4506360784Sdim  // addressing mode.
4507360784Sdim  //
4508360784Sdim  // E.g.
4509360784Sdim  //
4510360784Sdim  // off_reg = G_Z/S/ANYEXT ext_reg
4511360784Sdim  // val = G_CONSTANT LegalShiftVal
4512360784Sdim  // shift = G_SHL off_reg val
4513360784Sdim  // ptr = G_PTR_ADD base_reg shift
4514360784Sdim  // x = G_LOAD ptr
4515360784Sdim  //
4516360784Sdim  // In this case we can get a load like this:
4517360784Sdim  //
4518360784Sdim  // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
4519360784Sdim  auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
4520360784Sdim                                       SizeInBytes, /*WantsExt=*/true);
4521360784Sdim  if (ExtendedShl)
4522360784Sdim    return ExtendedShl;
4523360784Sdim
4524360784Sdim  // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
4525360784Sdim  //
4526360784Sdim  // e.g.
4527360784Sdim  // ldr something, [base_reg, ext_reg, sxtw]
4528360784Sdim  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4529360784Sdim    return None;
4530360784Sdim
4531360784Sdim  // Check if this is an extend. We'll get an extend type if it is.
4532360784Sdim  AArch64_AM::ShiftExtendType Ext =
4533360784Sdim      getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
4534360784Sdim  if (Ext == AArch64_AM::InvalidShiftExtend)
4535360784Sdim    return None;
4536360784Sdim
4537360784Sdim  // Need a 32-bit wide register.
4538360784Sdim  MachineIRBuilder MIB(*PtrAdd);
4539360784Sdim  Register ExtReg =
4540360784Sdim      narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
4541360784Sdim  unsigned SignExtend = Ext == AArch64_AM::SXTW;
4542360784Sdim
4543360784Sdim  // Base is LHS, offset is ExtReg.
4544360784Sdim  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
4545360784Sdim           [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4546360784Sdim           [=](MachineInstrBuilder &MIB) {
4547360784Sdim             MIB.addImm(SignExtend);
4548360784Sdim             MIB.addImm(0);
4549360784Sdim           }}};
4550360784Sdim}
4551360784Sdim
4552327952Sdim/// Select a "register plus unscaled signed 9-bit immediate" address.  This
4553327952Sdim/// should only match when there is an offset that is not valid for a scaled
4554327952Sdim/// immediate addressing mode.  The "Size" argument is the size in bytes of the
4555327952Sdim/// memory reference, which is needed here to know what is valid for a scaled
4556327952Sdim/// immediate.
4557327952SdimInstructionSelector::ComplexRendererFns
4558327952SdimAArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4559327952Sdim                                                   unsigned Size) const {
4560327952Sdim  MachineRegisterInfo &MRI =
4561327952Sdim      Root.getParent()->getParent()->getParent()->getRegInfo();
4562327952Sdim
4563327952Sdim  if (!Root.isReg())
4564327952Sdim    return None;
4565327952Sdim
4566327952Sdim  if (!isBaseWithConstantOffset(Root, MRI))
4567327952Sdim    return None;
4568327952Sdim
4569327952Sdim  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4570327952Sdim  if (!RootDef)
4571327952Sdim    return None;
4572327952Sdim
4573327952Sdim  MachineOperand &OffImm = RootDef->getOperand(2);
4574327952Sdim  if (!OffImm.isReg())
4575327952Sdim    return None;
4576327952Sdim  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4577327952Sdim  if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4578327952Sdim    return None;
4579327952Sdim  int64_t RHSC;
4580327952Sdim  MachineOperand &RHSOp1 = RHS->getOperand(1);
4581327952Sdim  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4582327952Sdim    return None;
4583327952Sdim  RHSC = RHSOp1.getCImm()->getSExtValue();
4584327952Sdim
4585327952Sdim  // If the offset is valid as a scaled immediate, don't match here.
4586327952Sdim  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4587327952Sdim    return None;
4588327952Sdim  if (RHSC >= -256 && RHSC < 256) {
4589327952Sdim    MachineOperand &Base = RootDef->getOperand(1);
4590327952Sdim    return {{
4591327952Sdim        [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4592327952Sdim        [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4593327952Sdim    }};
4594327952Sdim  }
4595327952Sdim  return None;
4596327952Sdim}
4597327952Sdim
4598327952Sdim/// Select a "register plus scaled unsigned 12-bit immediate" address.  The
4599327952Sdim/// "Size" argument is the size in bytes of the memory reference, which
4600327952Sdim/// determines the scale.
4601327952SdimInstructionSelector::ComplexRendererFns
4602327952SdimAArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4603327952Sdim                                                  unsigned Size) const {
4604327952Sdim  MachineRegisterInfo &MRI =
4605327952Sdim      Root.getParent()->getParent()->getParent()->getRegInfo();
4606327952Sdim
4607327952Sdim  if (!Root.isReg())
4608327952Sdim    return None;
4609327952Sdim
4610327952Sdim  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4611327952Sdim  if (!RootDef)
4612327952Sdim    return None;
4613327952Sdim
4614327952Sdim  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4615327952Sdim    return {{
4616327952Sdim        [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4617327952Sdim        [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4618327952Sdim    }};
4619327952Sdim  }
4620327952Sdim
4621327952Sdim  if (isBaseWithConstantOffset(Root, MRI)) {
4622327952Sdim    MachineOperand &LHS = RootDef->getOperand(1);
4623327952Sdim    MachineOperand &RHS = RootDef->getOperand(2);
4624327952Sdim    MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4625327952Sdim    MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4626327952Sdim    if (LHSDef && RHSDef) {
4627327952Sdim      int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4628327952Sdim      unsigned Scale = Log2_32(Size);
4629327952Sdim      if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4630327952Sdim        if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4631327952Sdim          return {{
4632327952Sdim              [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4633327952Sdim              [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4634327952Sdim          }};
4635327952Sdim
4636327952Sdim        return {{
4637327952Sdim            [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4638327952Sdim            [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4639327952Sdim        }};
4640327952Sdim      }
4641327952Sdim    }
4642327952Sdim  }
4643327952Sdim
4644327952Sdim  // Before falling back to our general case, check if the unscaled
4645327952Sdim  // instructions can handle this. If so, that's preferable.
4646327952Sdim  if (selectAddrModeUnscaled(Root, Size).hasValue())
4647327952Sdim    return None;
4648327952Sdim
4649327952Sdim  return {{
4650327952Sdim      [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4651327952Sdim      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4652327952Sdim  }};
4653327952Sdim}
4654327952Sdim
4655360784Sdim/// Given a shift instruction, return the correct shift type for that
4656360784Sdim/// instruction.
4657360784Sdimstatic AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
4658360784Sdim  // TODO: Handle AArch64_AM::ROR
4659360784Sdim  switch (MI.getOpcode()) {
4660360784Sdim  default:
4661360784Sdim    return AArch64_AM::InvalidShiftExtend;
4662360784Sdim  case TargetOpcode::G_SHL:
4663360784Sdim    return AArch64_AM::LSL;
4664360784Sdim  case TargetOpcode::G_LSHR:
4665360784Sdim    return AArch64_AM::LSR;
4666360784Sdim  case TargetOpcode::G_ASHR:
4667360784Sdim    return AArch64_AM::ASR;
4668360784Sdim  }
4669360784Sdim}
4670360784Sdim
4671360784Sdim/// Select a "shifted register" operand. If the value is not shifted, set the
4672360784Sdim/// shift operand to a default value of "lsl 0".
4673360784Sdim///
4674360784Sdim/// TODO: Allow shifted register to be rotated in logical instructions.
4675360784SdimInstructionSelector::ComplexRendererFns
4676360784SdimAArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4677360784Sdim  if (!Root.isReg())
4678360784Sdim    return None;
4679360784Sdim  MachineRegisterInfo &MRI =
4680360784Sdim      Root.getParent()->getParent()->getParent()->getRegInfo();
4681360784Sdim
4682360784Sdim  // Check if the operand is defined by an instruction which corresponds to
4683360784Sdim  // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
4684360784Sdim  //
4685360784Sdim  // TODO: Handle AArch64_AM::ROR for logical instructions.
4686360784Sdim  MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
4687360784Sdim  if (!ShiftInst)
4688360784Sdim    return None;
4689360784Sdim  AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
4690360784Sdim  if (ShType == AArch64_AM::InvalidShiftExtend)
4691360784Sdim    return None;
4692360784Sdim  if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
4693360784Sdim    return None;
4694360784Sdim
4695360784Sdim  // Need an immediate on the RHS.
4696360784Sdim  MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
4697360784Sdim  auto Immed = getImmedFromMO(ShiftRHS);
4698360784Sdim  if (!Immed)
4699360784Sdim    return None;
4700360784Sdim
4701360784Sdim  // We have something that we can fold. Fold in the shift's LHS and RHS into
4702360784Sdim  // the instruction.
4703360784Sdim  MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
4704360784Sdim  Register ShiftReg = ShiftLHS.getReg();
4705360784Sdim
4706360784Sdim  unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
4707360784Sdim  unsigned Val = *Immed & (NumBits - 1);
4708360784Sdim  unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
4709360784Sdim
4710360784Sdim  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
4711360784Sdim           [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
4712360784Sdim}
4713360784Sdim
4714360784SdimAArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
4715360784Sdim    MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
4716360784Sdim  unsigned Opc = MI.getOpcode();
4717360784Sdim
4718360784Sdim  // Handle explicit extend instructions first.
4719360784Sdim  if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
4720360784Sdim    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4721360784Sdim    assert(Size != 64 && "Extend from 64 bits?");
4722360784Sdim    switch (Size) {
4723360784Sdim    case 8:
4724360784Sdim      return AArch64_AM::SXTB;
4725360784Sdim    case 16:
4726360784Sdim      return AArch64_AM::SXTH;
4727360784Sdim    case 32:
4728360784Sdim      return AArch64_AM::SXTW;
4729360784Sdim    default:
4730360784Sdim      return AArch64_AM::InvalidShiftExtend;
4731360784Sdim    }
4732360784Sdim  }
4733360784Sdim
4734360784Sdim  if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
4735360784Sdim    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4736360784Sdim    assert(Size != 64 && "Extend from 64 bits?");
4737360784Sdim    switch (Size) {
4738360784Sdim    case 8:
4739360784Sdim      return AArch64_AM::UXTB;
4740360784Sdim    case 16:
4741360784Sdim      return AArch64_AM::UXTH;
4742360784Sdim    case 32:
4743360784Sdim      return AArch64_AM::UXTW;
4744360784Sdim    default:
4745360784Sdim      return AArch64_AM::InvalidShiftExtend;
4746360784Sdim    }
4747360784Sdim  }
4748360784Sdim
4749360784Sdim  // Don't have an explicit extend. Try to handle a G_AND with a constant mask
4750360784Sdim  // on the RHS.
4751360784Sdim  if (Opc != TargetOpcode::G_AND)
4752360784Sdim    return AArch64_AM::InvalidShiftExtend;
4753360784Sdim
4754360784Sdim  Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
4755360784Sdim  if (!MaybeAndMask)
4756360784Sdim    return AArch64_AM::InvalidShiftExtend;
4757360784Sdim  uint64_t AndMask = *MaybeAndMask;
4758360784Sdim  switch (AndMask) {
4759360784Sdim  default:
4760360784Sdim    return AArch64_AM::InvalidShiftExtend;
4761360784Sdim  case 0xFF:
4762360784Sdim    return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
4763360784Sdim  case 0xFFFF:
4764360784Sdim    return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
4765360784Sdim  case 0xFFFFFFFF:
4766360784Sdim    return AArch64_AM::UXTW;
4767360784Sdim  }
4768360784Sdim}
4769360784Sdim
4770360784SdimRegister AArch64InstructionSelector::narrowExtendRegIfNeeded(
4771360784Sdim    Register ExtReg, MachineIRBuilder &MIB) const {
4772360784Sdim  MachineRegisterInfo &MRI = *MIB.getMRI();
4773360784Sdim  if (MRI.getType(ExtReg).getSizeInBits() == 32)
4774360784Sdim    return ExtReg;
4775360784Sdim
4776360784Sdim  // Insert a copy to move ExtReg to GPR32.
4777360784Sdim  Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4778360784Sdim  auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
4779360784Sdim
4780360784Sdim  // Select the copy into a subregister copy.
4781360784Sdim  selectCopy(*Copy, TII, MRI, TRI, RBI);
4782360784Sdim  return Copy.getReg(0);
4783360784Sdim}
4784360784Sdim
4785360784Sdim/// Select an "extended register" operand. This operand folds in an extend
4786360784Sdim/// followed by an optional left shift.
4787360784SdimInstructionSelector::ComplexRendererFns
4788360784SdimAArch64InstructionSelector::selectArithExtendedRegister(
4789360784Sdim    MachineOperand &Root) const {
4790360784Sdim  if (!Root.isReg())
4791360784Sdim    return None;
4792360784Sdim  MachineRegisterInfo &MRI =
4793360784Sdim      Root.getParent()->getParent()->getParent()->getRegInfo();
4794360784Sdim
4795360784Sdim  uint64_t ShiftVal = 0;
4796360784Sdim  Register ExtReg;
4797360784Sdim  AArch64_AM::ShiftExtendType Ext;
4798360784Sdim  MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
4799360784Sdim  if (!RootDef)
4800360784Sdim    return None;
4801360784Sdim
4802360784Sdim  if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
4803360784Sdim    return None;
4804360784Sdim
4805360784Sdim  // Check if we can fold a shift and an extend.
4806360784Sdim  if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
4807360784Sdim    // Look for a constant on the RHS of the shift.
4808360784Sdim    MachineOperand &RHS = RootDef->getOperand(2);
4809360784Sdim    Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
4810360784Sdim    if (!MaybeShiftVal)
4811360784Sdim      return None;
4812360784Sdim    ShiftVal = *MaybeShiftVal;
4813360784Sdim    if (ShiftVal > 4)
4814360784Sdim      return None;
4815360784Sdim    // Look for a valid extend instruction on the LHS of the shift.
4816360784Sdim    MachineOperand &LHS = RootDef->getOperand(1);
4817360784Sdim    MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4818360784Sdim    if (!ExtDef)
4819360784Sdim      return None;
4820360784Sdim    Ext = getExtendTypeForInst(*ExtDef, MRI);
4821360784Sdim    if (Ext == AArch64_AM::InvalidShiftExtend)
4822360784Sdim      return None;
4823360784Sdim    ExtReg = ExtDef->getOperand(1).getReg();
4824360784Sdim  } else {
4825360784Sdim    // Didn't get a shift. Try just folding an extend.
4826360784Sdim    Ext = getExtendTypeForInst(*RootDef, MRI);
4827360784Sdim    if (Ext == AArch64_AM::InvalidShiftExtend)
4828360784Sdim      return None;
4829360784Sdim    ExtReg = RootDef->getOperand(1).getReg();
4830360784Sdim
4831360784Sdim    // If we have a 32 bit instruction which zeroes out the high half of a
4832360784Sdim    // register, we get an implicit zero extend for free. Check if we have one.
4833360784Sdim    // FIXME: We actually emit the extend right now even though we don't have
4834360784Sdim    // to.
4835360784Sdim    if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
4836360784Sdim      MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
4837360784Sdim      if (ExtInst && isDef32(*ExtInst))
4838360784Sdim        return None;
4839360784Sdim    }
4840360784Sdim  }
4841360784Sdim
4842360784Sdim  // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
4843360784Sdim  // copy.
4844360784Sdim  MachineIRBuilder MIB(*RootDef);
4845360784Sdim  ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
4846360784Sdim
4847360784Sdim  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4848360784Sdim           [=](MachineInstrBuilder &MIB) {
4849360784Sdim             MIB.addImm(getArithExtendImm(Ext, ShiftVal));
4850360784Sdim           }}};
4851360784Sdim}
4852360784Sdim
4853341825Sdimvoid AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4854360784Sdim                                                const MachineInstr &MI,
4855360784Sdim                                                int OpIdx) const {
4856341825Sdim  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4857360784Sdim  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
4858360784Sdim         "Expected G_CONSTANT");
4859341825Sdim  Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4860341825Sdim  assert(CstVal && "Expected constant value");
4861341825Sdim  MIB.addImm(CstVal.getValue());
4862341825Sdim}
4863341825Sdim
4864360784Sdimvoid AArch64InstructionSelector::renderLogicalImm32(
4865360784Sdim  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
4866360784Sdim  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
4867360784Sdim         "Expected G_CONSTANT");
4868360784Sdim  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4869360784Sdim  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
4870360784Sdim  MIB.addImm(Enc);
4871360784Sdim}
4872360784Sdim
4873360784Sdimvoid AArch64InstructionSelector::renderLogicalImm64(
4874360784Sdim  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
4875360784Sdim  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
4876360784Sdim         "Expected G_CONSTANT");
4877360784Sdim  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4878360784Sdim  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
4879360784Sdim  MIB.addImm(Enc);
4880360784Sdim}
4881360784Sdim
4882360784Sdimbool AArch64InstructionSelector::isLoadStoreOfNumBytes(
4883360784Sdim    const MachineInstr &MI, unsigned NumBytes) const {
4884360784Sdim  if (!MI.mayLoadOrStore())
4885360784Sdim    return false;
4886360784Sdim  assert(MI.hasOneMemOperand() &&
4887360784Sdim         "Expected load/store to have only one mem op!");
4888360784Sdim  return (*MI.memoperands_begin())->getSize() == NumBytes;
4889360784Sdim}
4890360784Sdim
4891360784Sdimbool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
4892360784Sdim  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4893360784Sdim  if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
4894360784Sdim    return false;
4895360784Sdim
4896360784Sdim  // Only return true if we know the operation will zero-out the high half of
4897360784Sdim  // the 64-bit register. Truncates can be subregister copies, which don't
4898360784Sdim  // zero out the high bits. Copies and other copy-like instructions can be
4899360784Sdim  // fed by truncates, or could be lowered as subregister copies.
4900360784Sdim  switch (MI.getOpcode()) {
4901360784Sdim  default:
4902360784Sdim    return true;
4903360784Sdim  case TargetOpcode::COPY:
4904360784Sdim  case TargetOpcode::G_BITCAST:
4905360784Sdim  case TargetOpcode::G_TRUNC:
4906360784Sdim  case TargetOpcode::G_PHI:
4907360784Sdim    return false;
4908360784Sdim  }
4909360784Sdim}
4910360784Sdim
4911321369Sdimnamespace llvm {
4912321369SdimInstructionSelector *
4913321369SdimcreateAArch64InstructionSelector(const AArch64TargetMachine &TM,
4914321369Sdim                                 AArch64Subtarget &Subtarget,
4915321369Sdim                                 AArch64RegisterBankInfo &RBI) {
4916321369Sdim  return new AArch64InstructionSelector(TM, Subtarget, RBI);
4917321369Sdim}
4918321369Sdim}
4919