1193323Sed//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file defines a DAG pattern matching instruction selector for X86,
11193323Sed// converting from a legalized dag to a X86 dag.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15193323Sed#include "X86.h"
16193323Sed#include "X86InstrBuilder.h"
17193323Sed#include "X86MachineFunctionInfo.h"
18193323Sed#include "X86RegisterInfo.h"
19193323Sed#include "X86Subtarget.h"
20193323Sed#include "X86TargetMachine.h"
21249423Sdim#include "llvm/ADT/Statistic.h"
22249423Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
23193323Sed#include "llvm/CodeGen/MachineFunction.h"
24193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h"
25193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
26193323Sed#include "llvm/CodeGen/SelectionDAGISel.h"
27280031Sdim#include "llvm/IR/Function.h"
28249423Sdim#include "llvm/IR/Instructions.h"
29249423Sdim#include "llvm/IR/Intrinsics.h"
30249423Sdim#include "llvm/IR/Type.h"
31193323Sed#include "llvm/Support/Debug.h"
32198090Srdivacky#include "llvm/Support/ErrorHandling.h"
33193323Sed#include "llvm/Support/MathExtras.h"
34198090Srdivacky#include "llvm/Support/raw_ostream.h"
35249423Sdim#include "llvm/Target/TargetMachine.h"
36249423Sdim#include "llvm/Target/TargetOptions.h"
37280031Sdim#include <stdint.h>
38193323Sedusing namespace llvm;
39193323Sed
40276479Sdim#define DEBUG_TYPE "x86-isel"
41276479Sdim
42193323SedSTATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
43193323Sed
44193323Sed//===----------------------------------------------------------------------===//
45193323Sed//                      Pattern Matcher Implementation
46193323Sed//===----------------------------------------------------------------------===//
47193323Sed
48193323Sednamespace {
49296417Sdim  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
50296417Sdim  /// numbers for the leaves of the matched tree.
51193323Sed  struct X86ISelAddressMode {
52193323Sed    enum {
53193323Sed      RegBase,
54193323Sed      FrameIndexBase
55193323Sed    } BaseType;
56193323Sed
57207618Srdivacky    // This is really a union, discriminated by BaseType!
58207618Srdivacky    SDValue Base_Reg;
59207618Srdivacky    int Base_FrameIndex;
60193323Sed
61193323Sed    unsigned Scale;
62239462Sdim    SDValue IndexReg;
63193323Sed    int32_t Disp;
64193323Sed    SDValue Segment;
65207618Srdivacky    const GlobalValue *GV;
66207618Srdivacky    const Constant *CP;
67207618Srdivacky    const BlockAddress *BlockAddr;
68193323Sed    const char *ES;
69288943Sdim    MCSymbol *MCSym;
70193323Sed    int JT;
71193323Sed    unsigned Align;    // CP alignment.
72195098Sed    unsigned char SymbolFlags;  // X86II::MO_*
73193323Sed
74193323Sed    X86ISelAddressMode()
75288943Sdim        : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
76288943Sdim          Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
77288943Sdim          MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
78193323Sed
79193323Sed    bool hasSymbolicDisplacement() const {
80276479Sdim      return GV != nullptr || CP != nullptr || ES != nullptr ||
81288943Sdim             MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
82193323Sed    }
83239462Sdim
84195098Sed    bool hasBaseOrIndexReg() const {
85261991Sdim      return BaseType == FrameIndexBase ||
86276479Sdim             IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
87195098Sed    }
88239462Sdim
89296417Sdim    /// Return true if this addressing mode is already RIP-relative.
90195098Sed    bool isRIPRelative() const {
91195098Sed      if (BaseType != RegBase) return false;
92195098Sed      if (RegisterSDNode *RegNode =
93207618Srdivacky            dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
94195098Sed        return RegNode->getReg() == X86::RIP;
95195098Sed      return false;
96195098Sed    }
97239462Sdim
98195098Sed    void setBaseReg(SDValue Reg) {
99195098Sed      BaseType = RegBase;
100207618Srdivacky      Base_Reg = Reg;
101195098Sed    }
102193323Sed
103243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
104193323Sed    void dump() {
105202375Srdivacky      dbgs() << "X86ISelAddressMode " << this << '\n';
106207618Srdivacky      dbgs() << "Base_Reg ";
107276479Sdim      if (Base_Reg.getNode())
108239462Sdim        Base_Reg.getNode()->dump();
109198090Srdivacky      else
110202375Srdivacky        dbgs() << "nul";
111207618Srdivacky      dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
112198090Srdivacky             << " Scale" << Scale << '\n'
113198090Srdivacky             << "IndexReg ";
114276479Sdim      if (IndexReg.getNode())
115198090Srdivacky        IndexReg.getNode()->dump();
116198090Srdivacky      else
117239462Sdim        dbgs() << "nul";
118202375Srdivacky      dbgs() << " Disp " << Disp << '\n'
119198090Srdivacky             << "GV ";
120198090Srdivacky      if (GV)
121198090Srdivacky        GV->dump();
122198090Srdivacky      else
123202375Srdivacky        dbgs() << "nul";
124202375Srdivacky      dbgs() << " CP ";
125198090Srdivacky      if (CP)
126198090Srdivacky        CP->dump();
127198090Srdivacky      else
128202375Srdivacky        dbgs() << "nul";
129202375Srdivacky      dbgs() << '\n'
130198090Srdivacky             << "ES ";
131198090Srdivacky      if (ES)
132202375Srdivacky        dbgs() << ES;
133198090Srdivacky      else
134202375Srdivacky        dbgs() << "nul";
135288943Sdim      dbgs() << " MCSym ";
136288943Sdim      if (MCSym)
137288943Sdim        dbgs() << MCSym;
138288943Sdim      else
139288943Sdim        dbgs() << "nul";
140202375Srdivacky      dbgs() << " JT" << JT << " Align" << Align << '\n';
141193323Sed    }
142243830Sdim#endif
143193323Sed  };
144193323Sed}
145193323Sed
146193323Sednamespace {
147193323Sed  //===--------------------------------------------------------------------===//
148296417Sdim  /// ISel - X86-specific code to select X86 machine instructions for
149193323Sed  /// SelectionDAG operations.
150193323Sed  ///
151276479Sdim  class X86DAGToDAGISel final : public SelectionDAGISel {
152296417Sdim    /// Keep a pointer to the X86Subtarget around so that we can
153193323Sed    /// make the right decision when generating code for different targets.
154193323Sed    const X86Subtarget *Subtarget;
155193323Sed
156296417Sdim    /// If true, selector should try to optimize for code size instead of
157296417Sdim    /// performance.
158193323Sed    bool OptForSize;
159193323Sed
160193323Sed  public:
161193323Sed    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
162288943Sdim        : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
163193323Sed
164276479Sdim    const char *getPassName() const override {
165193323Sed      return "X86 DAG->DAG Instruction Selection";
166193323Sed    }
167193323Sed
168276479Sdim    bool runOnMachineFunction(MachineFunction &MF) override {
169276479Sdim      // Reset the subtarget each time through.
170288943Sdim      Subtarget = &MF.getSubtarget<X86Subtarget>();
171276479Sdim      SelectionDAGISel::runOnMachineFunction(MF);
172276479Sdim      return true;
173276479Sdim    }
174193323Sed
175276479Sdim    void EmitFunctionEntryCode() override;
176193323Sed
177276479Sdim    bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
178203954Srdivacky
179276479Sdim    void PreprocessISelDAG() override;
180276479Sdim
181212904Sdim    inline bool immSext8(SDNode *N) const {
182212904Sdim      return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
183212904Sdim    }
184212904Sdim
185296417Sdim    // True if the 64-bit immediate fits in a 32-bit sign-extended field.
186212904Sdim    inline bool i64immSExt32(SDNode *N) const {
187212904Sdim      uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
188212904Sdim      return (int64_t)v == (int32_t)v;
189212904Sdim    }
190212904Sdim
191193323Sed// Include the pieces autogenerated from the target description.
192193323Sed#include "X86GenDAGISel.inc"
193193323Sed
194193323Sed  private:
195276479Sdim    SDNode *Select(SDNode *N) override;
196296417Sdim    SDNode *selectGather(SDNode *N, unsigned Opc);
197296417Sdim    SDNode *selectAtomicLoadArith(SDNode *Node, MVT NVT);
198193323Sed
199296417Sdim    bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
200296417Sdim    bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
201296417Sdim    bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
202296417Sdim    bool matchAddress(SDValue N, X86ISelAddressMode &AM);
203296417Sdim    bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
204296417Sdim    bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
205198090Srdivacky                                 unsigned Depth);
206296417Sdim    bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
207296417Sdim    bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
208193323Sed                    SDValue &Scale, SDValue &Index, SDValue &Disp,
209193323Sed                    SDValue &Segment);
210296417Sdim    bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
211288943Sdim                          SDValue &Scale, SDValue &Index, SDValue &Disp,
212288943Sdim                          SDValue &Segment);
213296417Sdim    bool selectMOV64Imm32(SDValue N, SDValue &Imm);
214296417Sdim    bool selectLEAAddr(SDValue N, SDValue &Base,
215210299Sed                       SDValue &Scale, SDValue &Index, SDValue &Disp,
216210299Sed                       SDValue &Segment);
217296417Sdim    bool selectLEA64_32Addr(SDValue N, SDValue &Base,
218261991Sdim                            SDValue &Scale, SDValue &Index, SDValue &Disp,
219261991Sdim                            SDValue &Segment);
220296417Sdim    bool selectTLSADDRAddr(SDValue N, SDValue &Base,
221210299Sed                           SDValue &Scale, SDValue &Index, SDValue &Disp,
222210299Sed                           SDValue &Segment);
223296417Sdim    bool selectScalarSSELoad(SDNode *Root, SDValue N,
224204642Srdivacky                             SDValue &Base, SDValue &Scale,
225193323Sed                             SDValue &Index, SDValue &Disp,
226193323Sed                             SDValue &Segment,
227204642Srdivacky                             SDValue &NodeWithChain);
228239462Sdim
229296417Sdim    bool tryFoldLoad(SDNode *P, SDValue N,
230193323Sed                     SDValue &Base, SDValue &Scale,
231193323Sed                     SDValue &Index, SDValue &Disp,
232193323Sed                     SDValue &Segment);
233239462Sdim
234296417Sdim    /// Implement addressing mode selection for inline asm expressions.
235276479Sdim    bool SelectInlineAsmMemoryOperand(const SDValue &Op,
236288943Sdim                                      unsigned ConstraintID,
237276479Sdim                                      std::vector<SDValue> &OutOps) override;
238239462Sdim
239296417Sdim    void emitSpecialCodeForMain();
240193323Sed
241288943Sdim    inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL,
242288943Sdim                                   SDValue &Base, SDValue &Scale,
243288943Sdim                                   SDValue &Index, SDValue &Disp,
244288943Sdim                                   SDValue &Segment) {
245280031Sdim      Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
246288943Sdim                 ? CurDAG->getTargetFrameIndex(
247288943Sdim                       AM.Base_FrameIndex,
248288943Sdim                       TLI->getPointerTy(CurDAG->getDataLayout()))
249280031Sdim                 : AM.Base_Reg;
250288943Sdim      Scale = getI8Imm(AM.Scale, DL);
251193323Sed      Index = AM.IndexReg;
252296417Sdim      // These are 32-bit even in 64-bit mode since RIP-relative offset
253193323Sed      // is 32-bit.
254193323Sed      if (AM.GV)
255261991Sdim        Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
256210299Sed                                              MVT::i32, AM.Disp,
257195098Sed                                              AM.SymbolFlags);
258193323Sed      else if (AM.CP)
259193323Sed        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
260195098Sed                                             AM.Align, AM.Disp, AM.SymbolFlags);
261243830Sdim      else if (AM.ES) {
262243830Sdim        assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
263195098Sed        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
264288943Sdim      } else if (AM.MCSym) {
265288943Sdim        assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
266288943Sdim        assert(AM.SymbolFlags == 0 && "oo");
267288943Sdim        Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
268243830Sdim      } else if (AM.JT != -1) {
269243830Sdim        assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
270195098Sed        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
271243830Sdim      } else if (AM.BlockAddr)
272243830Sdim        Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
273243830Sdim                                             AM.SymbolFlags);
274193323Sed      else
275288943Sdim        Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
276193323Sed
277193323Sed      if (AM.Segment.getNode())
278193323Sed        Segment = AM.Segment;
279193323Sed      else
280193323Sed        Segment = CurDAG->getRegister(0, MVT::i32);
281193323Sed    }
282193323Sed
283296417Sdim    // Utility function to determine whether we should avoid selecting
284296417Sdim    // immediate forms of instructions for better code size or not.
285296417Sdim    // At a high level, we'd like to avoid such instructions when
286296417Sdim    // we have similar constants used within the same basic block
287296417Sdim    // that can be kept in a register.
288296417Sdim    //
289296417Sdim    bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
290296417Sdim      uint32_t UseCount = 0;
291296417Sdim
292296417Sdim      // Do not want to hoist if we're not optimizing for size.
293296417Sdim      // TODO: We'd like to remove this restriction.
294296417Sdim      // See the comment in X86InstrInfo.td for more info.
295296417Sdim      if (!OptForSize)
296296417Sdim        return false;
297296417Sdim
298296417Sdim      // Walk all the users of the immediate.
299296417Sdim      for (SDNode::use_iterator UI = N->use_begin(),
300296417Sdim           UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
301296417Sdim
302296417Sdim        SDNode *User = *UI;
303296417Sdim
304296417Sdim        // This user is already selected. Count it as a legitimate use and
305296417Sdim        // move on.
306296417Sdim        if (User->isMachineOpcode()) {
307296417Sdim          UseCount++;
308296417Sdim          continue;
309296417Sdim        }
310296417Sdim
311296417Sdim        // We want to count stores of immediates as real uses.
312296417Sdim        if (User->getOpcode() == ISD::STORE &&
313296417Sdim            User->getOperand(1).getNode() == N) {
314296417Sdim          UseCount++;
315296417Sdim          continue;
316296417Sdim        }
317296417Sdim
318296417Sdim        // We don't currently match users that have > 2 operands (except
319296417Sdim        // for stores, which are handled above)
320296417Sdim        // Those instruction won't match in ISEL, for now, and would
321296417Sdim        // be counted incorrectly.
322296417Sdim        // This may change in the future as we add additional instruction
323296417Sdim        // types.
324296417Sdim        if (User->getNumOperands() != 2)
325296417Sdim          continue;
326296417Sdim
327296417Sdim        // Immediates that are used for offsets as part of stack
328296417Sdim        // manipulation should be left alone. These are typically
329296417Sdim        // used to indicate SP offsets for argument passing and
330296417Sdim        // will get pulled into stores/pushes (implicitly).
331296417Sdim        if (User->getOpcode() == X86ISD::ADD ||
332296417Sdim            User->getOpcode() == ISD::ADD    ||
333296417Sdim            User->getOpcode() == X86ISD::SUB ||
334296417Sdim            User->getOpcode() == ISD::SUB) {
335296417Sdim
336296417Sdim          // Find the other operand of the add/sub.
337296417Sdim          SDValue OtherOp = User->getOperand(0);
338296417Sdim          if (OtherOp.getNode() == N)
339296417Sdim            OtherOp = User->getOperand(1);
340296417Sdim
341296417Sdim          // Don't count if the other operand is SP.
342296417Sdim          RegisterSDNode *RegNode;
343296417Sdim          if (OtherOp->getOpcode() == ISD::CopyFromReg &&
344296417Sdim              (RegNode = dyn_cast_or_null<RegisterSDNode>(
345296417Sdim                 OtherOp->getOperand(1).getNode())))
346296417Sdim            if ((RegNode->getReg() == X86::ESP) ||
347296417Sdim                (RegNode->getReg() == X86::RSP))
348296417Sdim              continue;
349296417Sdim        }
350296417Sdim
351296417Sdim        // ... otherwise, count this and move on.
352296417Sdim        UseCount++;
353296417Sdim      }
354296417Sdim
355296417Sdim      // If we have more than 1 use, then recommend for hoisting.
356296417Sdim      return (UseCount > 1);
357296417Sdim    }
358296417Sdim
359296417Sdim    /// Return a target constant with the specified value of type i8.
360288943Sdim    inline SDValue getI8Imm(unsigned Imm, SDLoc DL) {
361288943Sdim      return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
362193323Sed    }
363193323Sed
364296417Sdim    /// Return a target constant with the specified value, of type i32.
365288943Sdim    inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
366288943Sdim      return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
367193323Sed    }
368193323Sed
369296417Sdim    /// Return an SDNode that returns the value of the global base register.
370296417Sdim    /// Output instructions required to initialize the global base register,
371296417Sdim    /// if necessary.
372193323Sed    SDNode *getGlobalBaseReg();
373193323Sed
374296417Sdim    /// Return a reference to the TargetMachine, casted to the target-specific
375296417Sdim    /// type.
376249423Sdim    const X86TargetMachine &getTargetMachine() const {
377193399Sed      return static_cast<const X86TargetMachine &>(TM);
378193399Sed    }
379193399Sed
380296417Sdim    /// Return a reference to the TargetInstrInfo, casted to the target-specific
381296417Sdim    /// type.
382249423Sdim    const X86InstrInfo *getInstrInfo() const {
383288943Sdim      return Subtarget->getInstrInfo();
384193399Sed    }
385280031Sdim
386280031Sdim    /// \brief Address-mode matching performs shift-of-and to and-of-shift
387280031Sdim    /// reassociation in order to expose more scaled addressing
388280031Sdim    /// opportunities.
389280031Sdim    bool ComplexPatternFuncMutatesDAG() const override {
390280031Sdim      return true;
391280031Sdim    }
392193323Sed  };
393193323Sed}
394193323Sed
395193323Sed
396203954Srdivackybool
397203954SrdivackyX86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
398193323Sed  if (OptLevel == CodeGenOpt::None) return false;
399193323Sed
400203954Srdivacky  if (!N.hasOneUse())
401203954Srdivacky    return false;
402203954Srdivacky
403203954Srdivacky  if (N.getOpcode() != ISD::LOAD)
404203954Srdivacky    return true;
405203954Srdivacky
406203954Srdivacky  // If N is a load, do additional profitability checks.
407203954Srdivacky  if (U == Root) {
408193323Sed    switch (U->getOpcode()) {
409193323Sed    default: break;
410202375Srdivacky    case X86ISD::ADD:
411202375Srdivacky    case X86ISD::SUB:
412202375Srdivacky    case X86ISD::AND:
413202375Srdivacky    case X86ISD::XOR:
414202375Srdivacky    case X86ISD::OR:
415193323Sed    case ISD::ADD:
416193323Sed    case ISD::ADDC:
417193323Sed    case ISD::ADDE:
418193323Sed    case ISD::AND:
419193323Sed    case ISD::OR:
420193323Sed    case ISD::XOR: {
421193323Sed      SDValue Op1 = U->getOperand(1);
422193323Sed
423193323Sed      // If the other operand is a 8-bit immediate we should fold the immediate
424193323Sed      // instead. This reduces code size.
425193323Sed      // e.g.
426193323Sed      // movl 4(%esp), %eax
427193323Sed      // addl $4, %eax
428193323Sed      // vs.
429193323Sed      // movl $4, %eax
430193323Sed      // addl 4(%esp), %eax
431193323Sed      // The former is 2 bytes shorter. In case where the increment is 1, then
432193323Sed      // the saving can be 4 bytes (by using incl %eax).
433193323Sed      if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
434193323Sed        if (Imm->getAPIntValue().isSignedIntN(8))
435193323Sed          return false;
436193323Sed
437193323Sed      // If the other operand is a TLS address, we should fold it instead.
438193323Sed      // This produces
439193323Sed      // movl    %gs:0, %eax
440193323Sed      // leal    i@NTPOFF(%eax), %eax
441193323Sed      // instead of
442193323Sed      // movl    $i@NTPOFF, %eax
443193323Sed      // addl    %gs:0, %eax
444193323Sed      // if the block also has an access to a second TLS address this will save
445193323Sed      // a load.
446276479Sdim      // FIXME: This is probably also true for non-TLS addresses.
447193323Sed      if (Op1.getOpcode() == X86ISD::Wrapper) {
448193323Sed        SDValue Val = Op1.getOperand(0);
449193323Sed        if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
450193323Sed          return false;
451193323Sed      }
452193323Sed    }
453193323Sed    }
454203954Srdivacky  }
455193323Sed
456203954Srdivacky  return true;
457203954Srdivacky}
458203954Srdivacky
459296417Sdim/// Replace the original chain operand of the call with
460205218Srdivacky/// load's chain operand and move load below the call's chain operand.
461296417Sdimstatic void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
462243830Sdim                               SDValue Call, SDValue OrigChain) {
463193323Sed  SmallVector<SDValue, 8> Ops;
464205218Srdivacky  SDValue Chain = OrigChain.getOperand(0);
465193323Sed  if (Chain.getNode() == Load.getNode())
466193323Sed    Ops.push_back(Load.getOperand(0));
467193323Sed  else {
468193323Sed    assert(Chain.getOpcode() == ISD::TokenFactor &&
469205218Srdivacky           "Unexpected chain operand");
470193323Sed    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
471193323Sed      if (Chain.getOperand(i).getNode() == Load.getNode())
472193323Sed        Ops.push_back(Load.getOperand(0));
473193323Sed      else
474193323Sed        Ops.push_back(Chain.getOperand(i));
475193323Sed    SDValue NewChain =
476276479Sdim      CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
477193323Sed    Ops.clear();
478193323Sed    Ops.push_back(NewChain);
479193323Sed  }
480288943Sdim  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
481276479Sdim  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
482210299Sed  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
483193323Sed                             Load.getOperand(1), Load.getOperand(2));
484243830Sdim
485193323Sed  Ops.clear();
486193323Sed  Ops.push_back(SDValue(Load.getNode(), 1));
487288943Sdim  Ops.append(Call->op_begin() + 1, Call->op_end());
488276479Sdim  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
489193323Sed}
490193323Sed
491296417Sdim/// Return true if call address is a load and it can be
492193323Sed/// moved below CALLSEQ_START and the chains leading up to the call.
493193323Sed/// Return the CALLSEQ_START by reference as a second output.
494205218Srdivacky/// In the case of a tail call, there isn't a callseq node between the call
495205218Srdivacky/// chain and the load.
496205218Srdivackystatic bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
497243830Sdim  // The transformation is somewhat dangerous if the call's chain was glued to
498243830Sdim  // the call. After MoveBelowOrigChain the load is moved between the call and
499243830Sdim  // the chain, this can create a cycle if the load is not folded. So it is
500243830Sdim  // *really* important that we are sure the load will be folded.
501193323Sed  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
502193323Sed    return false;
503193323Sed  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
504193323Sed  if (!LD ||
505193323Sed      LD->isVolatile() ||
506193323Sed      LD->getAddressingMode() != ISD::UNINDEXED ||
507193323Sed      LD->getExtensionType() != ISD::NON_EXTLOAD)
508193323Sed    return false;
509193323Sed
510193323Sed  // Now let's find the callseq_start.
511205218Srdivacky  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
512193323Sed    if (!Chain.hasOneUse())
513193323Sed      return false;
514193323Sed    Chain = Chain.getOperand(0);
515193323Sed  }
516205218Srdivacky
517205218Srdivacky  if (!Chain.getNumOperands())
518205218Srdivacky    return false;
519249423Sdim  // Since we are not checking for AA here, conservatively abort if the chain
520249423Sdim  // writes to memory. It's not safe to move the callee (a load) across a store.
521249423Sdim  if (isa<MemSDNode>(Chain.getNode()) &&
522249423Sdim      cast<MemSDNode>(Chain.getNode())->writeMem())
523249423Sdim    return false;
524193323Sed  if (Chain.getOperand(0).getNode() == Callee.getNode())
525193323Sed    return true;
526193323Sed  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
527198090Srdivacky      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
528198090Srdivacky      Callee.getValue(1).hasOneUse())
529193323Sed    return true;
530193323Sed  return false;
531193323Sed}
532193323Sed
533204642Srdivackyvoid X86DAGToDAGISel::PreprocessISelDAG() {
534204792Srdivacky  // OptForSize is used in pattern predicates that isel is matching.
535296417Sdim  OptForSize = MF->getFunction()->optForSize();
536239462Sdim
537204642Srdivacky  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
538204642Srdivacky       E = CurDAG->allnodes_end(); I != E; ) {
539296417Sdim    SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
540193323Sed
541205218Srdivacky    if (OptLevel != CodeGenOpt::None &&
542249423Sdim        // Only does this when target favors doesn't favor register indirect
543249423Sdim        // call.
544249423Sdim        ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
545243830Sdim         (N->getOpcode() == X86ISD::TC_RETURN &&
546249423Sdim          // Only does this if load can be folded into TC_RETURN.
547243830Sdim          (Subtarget->is64Bit() ||
548243830Sdim           getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
549193323Sed      /// Also try moving call address load from outside callseq_start to just
550193323Sed      /// before the call to allow it to be folded.
551193323Sed      ///
552193323Sed      ///     [Load chain]
553193323Sed      ///         ^
554193323Sed      ///         |
555193323Sed      ///       [Load]
556193323Sed      ///       ^    ^
557193323Sed      ///       |    |
558193323Sed      ///      /      \--
559193323Sed      ///     /          |
560193323Sed      ///[CALLSEQ_START] |
561193323Sed      ///     ^          |
562193323Sed      ///     |          |
563193323Sed      /// [LOAD/C2Reg]   |
564193323Sed      ///     |          |
565193323Sed      ///      \        /
566193323Sed      ///       \      /
567193323Sed      ///       [CALL]
568205218Srdivacky      bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
569204642Srdivacky      SDValue Chain = N->getOperand(0);
570204642Srdivacky      SDValue Load  = N->getOperand(1);
571205218Srdivacky      if (!isCalleeLoad(Load, Chain, HasCallSeq))
572193323Sed        continue;
573296417Sdim      moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
574193323Sed      ++NumLoadMoved;
575193323Sed      continue;
576193323Sed    }
577239462Sdim
578204642Srdivacky    // Lower fpround and fpextend nodes that target the FP stack to be store and
579204642Srdivacky    // load to the stack.  This is a gross hack.  We would like to simply mark
580204642Srdivacky    // these as being illegal, but when we do that, legalize produces these when
581204642Srdivacky    // it expands calls, then expands these in the same legalize pass.  We would
582204642Srdivacky    // like dag combine to be able to hack on these between the call expansion
583204642Srdivacky    // and the node legalization.  As such this pass basically does "really
584204642Srdivacky    // late" legalization of these inline with the X86 isel pass.
585204642Srdivacky    // FIXME: This should only happen when not compiled with -O0.
586193323Sed    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
587193323Sed      continue;
588239462Sdim
589261991Sdim    MVT SrcVT = N->getOperand(0).getSimpleValueType();
590261991Sdim    MVT DstVT = N->getSimpleValueType(0);
591226633Sdim
592226633Sdim    // If any of the sources are vectors, no fp stack involved.
593226633Sdim    if (SrcVT.isVector() || DstVT.isVector())
594226633Sdim      continue;
595226633Sdim
596193323Sed    // If the source and destination are SSE registers, then this is a legal
597193323Sed    // conversion that should not be lowered.
598261991Sdim    const X86TargetLowering *X86Lowering =
599280031Sdim        static_cast<const X86TargetLowering *>(TLI);
600261991Sdim    bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
601261991Sdim    bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
602193323Sed    if (SrcIsSSE && DstIsSSE)
603193323Sed      continue;
604193323Sed
605193323Sed    if (!SrcIsSSE && !DstIsSSE) {
606193323Sed      // If this is an FPStack extension, it is a noop.
607193323Sed      if (N->getOpcode() == ISD::FP_EXTEND)
608193323Sed        continue;
609193323Sed      // If this is a value-preserving FPStack truncation, it is a noop.
610193323Sed      if (N->getConstantOperandVal(1))
611193323Sed        continue;
612193323Sed    }
613239462Sdim
614193323Sed    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
615193323Sed    // FPStack has extload and truncstore.  SSE can fold direct loads into other
616193323Sed    // operations.  Based on this, decide what we want to do.
617261991Sdim    MVT MemVT;
618193323Sed    if (N->getOpcode() == ISD::FP_ROUND)
619193323Sed      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
620193323Sed    else
621193323Sed      MemVT = SrcIsSSE ? SrcVT : DstVT;
622239462Sdim
623193323Sed    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
624261991Sdim    SDLoc dl(N);
625239462Sdim
626193323Sed    // FIXME: optimize the case where the src/dest is a load or store?
627193323Sed    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
628193323Sed                                          N->getOperand(0),
629218893Sdim                                          MemTmp, MachinePointerInfo(), MemVT,
630203954Srdivacky                                          false, false, 0);
631218893Sdim    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
632218893Sdim                                        MachinePointerInfo(),
633280031Sdim                                        MemVT, false, false, false, 0);
634193323Sed
635193323Sed    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
636193323Sed    // extload we created.  This will cause general havok on the dag because
637193323Sed    // anything below the conversion could be folded into other existing nodes.
638193323Sed    // To avoid invalidating 'I', back it up to the convert node.
639193323Sed    --I;
640193323Sed    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
641239462Sdim
642193323Sed    // Now that we did that, the node is dead.  Increment the iterator to the
643193323Sed    // next node to process, then delete N.
644193323Sed    ++I;
645193323Sed    CurDAG->DeleteNode(N);
646239462Sdim  }
647193323Sed}
648193323Sed
649193323Sed
650296417Sdim/// Emit any code that needs to be executed only in the main function.
651296417Sdimvoid X86DAGToDAGISel::emitSpecialCodeForMain() {
652218893Sdim  if (Subtarget->isTargetCygMing()) {
653288943Sdim    TargetLowering::ArgListTy Args;
654288943Sdim    auto &DL = CurDAG->getDataLayout();
655288943Sdim
656288943Sdim    TargetLowering::CallLoweringInfo CLI(*CurDAG);
657288943Sdim    CLI.setChain(CurDAG->getRoot())
658288943Sdim        .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
659288943Sdim                   CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
660288943Sdim                   std::move(Args), 0);
661288943Sdim    const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
662288943Sdim    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
663288943Sdim    CurDAG->setRoot(Result.second);
664218893Sdim  }
665193323Sed}
666193323Sed
667207618Srdivackyvoid X86DAGToDAGISel::EmitFunctionEntryCode() {
668193323Sed  // If this is main, emit special code for main.
669207618Srdivacky  if (const Function *Fn = MF->getFunction())
670207618Srdivacky    if (Fn->hasExternalLinkage() && Fn->getName() == "main")
671296417Sdim      emitSpecialCodeForMain();
672193323Sed}
673193323Sed
674224145Sdimstatic bool isDispSafeForFrameIndex(int64_t Val) {
675224145Sdim  // On 64-bit platforms, we can run into an issue where a frame index
676224145Sdim  // includes a displacement that, when added to the explicit displacement,
677224145Sdim  // will overflow the displacement field. Assuming that the frame index
678224145Sdim  // displacement fits into a 31-bit integer  (which is only slightly more
679224145Sdim  // aggressive than the current fundamental assumption that it fits into
680224145Sdim  // a 32-bit integer), a 31-bit disp should always be safe.
681224145Sdim  return isInt<31>(Val);
682224145Sdim}
683193323Sed
684296417Sdimbool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
685224145Sdim                                            X86ISelAddressMode &AM) {
686288943Sdim  // Cannot combine ExternalSymbol displacements with integer offsets.
687288943Sdim  if (Offset != 0 && (AM.ES || AM.MCSym))
688288943Sdim    return true;
689224145Sdim  int64_t Val = AM.Disp + Offset;
690224145Sdim  CodeModel::Model M = TM.getCodeModel();
691224145Sdim  if (Subtarget->is64Bit()) {
692224145Sdim    if (!X86::isOffsetSuitableForCodeModel(Val, M,
693224145Sdim                                           AM.hasSymbolicDisplacement()))
694224145Sdim      return true;
695224145Sdim    // In addition to the checks required for a register base, check that
696224145Sdim    // we do not try to use an unsafe Disp with a frame index.
697224145Sdim    if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
698224145Sdim        !isDispSafeForFrameIndex(Val))
699224145Sdim      return true;
700224145Sdim  }
701224145Sdim  AM.Disp = Val;
702224145Sdim  return false;
703224145Sdim
704224145Sdim}
705224145Sdim
706296417Sdimbool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
707218893Sdim  SDValue Address = N->getOperand(1);
708239462Sdim
709218893Sdim  // load gs:0 -> GS segment register.
710218893Sdim  // load fs:0 -> FS segment register.
711218893Sdim  //
712193323Sed  // This optimization is valid because the GNU TLS model defines that
713193323Sed  // gs:0 (or fs:0 on X86-64) contains its own address.
714193323Sed  // For more information see http://people.redhat.com/drepper/tls.pdf
715218893Sdim  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
716276479Sdim    if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
717239462Sdim        Subtarget->isTargetLinux())
718218893Sdim      switch (N->getPointerInfo().getAddrSpace()) {
719218893Sdim      case 256:
720218893Sdim        AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
721218893Sdim        return false;
722218893Sdim      case 257:
723218893Sdim        AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
724218893Sdim        return false;
725218893Sdim      }
726239462Sdim
727193323Sed  return true;
728193323Sed}
729193323Sed
730296417Sdim/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
731296417Sdim/// mode. These wrap things that will resolve down into a symbol reference.
732296417Sdim/// If no match is possible, this returns true, otherwise it returns false.
733296417Sdimbool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
734195098Sed  // If the addressing mode already has a symbol as the displacement, we can
735195098Sed  // never match another symbol.
736193323Sed  if (AM.hasSymbolicDisplacement())
737193323Sed    return true;
738193323Sed
739193323Sed  SDValue N0 = N.getOperand(0);
740198090Srdivacky  CodeModel::Model M = TM.getCodeModel();
741198090Srdivacky
742195098Sed  // Handle X86-64 rip-relative addresses.  We check this before checking direct
743195098Sed  // folding because RIP is preferable to non-RIP accesses.
744234353Sdim  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
745195098Sed      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
746195098Sed      // they cannot be folded into immediate fields.
747195098Sed      // FIXME: This can be improved for kernel and other models?
748234353Sdim      (M == CodeModel::Small || M == CodeModel::Kernel)) {
749234353Sdim    // Base and index reg must be 0 in order to use %rip as base.
750234353Sdim    if (AM.hasBaseOrIndexReg())
751234353Sdim      return true;
752195098Sed    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
753224145Sdim      X86ISelAddressMode Backup = AM;
754195098Sed      AM.GV = G->getGlobal();
755195098Sed      AM.SymbolFlags = G->getTargetFlags();
756296417Sdim      if (foldOffsetIntoAddress(G->getOffset(), AM)) {
757224145Sdim        AM = Backup;
758224145Sdim        return true;
759224145Sdim      }
760195098Sed    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
761224145Sdim      X86ISelAddressMode Backup = AM;
762195098Sed      AM.CP = CP->getConstVal();
763195098Sed      AM.Align = CP->getAlignment();
764195098Sed      AM.SymbolFlags = CP->getTargetFlags();
765296417Sdim      if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
766224145Sdim        AM = Backup;
767224145Sdim        return true;
768224145Sdim      }
769195098Sed    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
770195098Sed      AM.ES = S->getSymbol();
771195098Sed      AM.SymbolFlags = S->getTargetFlags();
772288943Sdim    } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
773288943Sdim      AM.MCSym = S->getMCSymbol();
774198892Srdivacky    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
775195098Sed      AM.JT = J->getIndex();
776195098Sed      AM.SymbolFlags = J->getTargetFlags();
777243830Sdim    } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
778243830Sdim      X86ISelAddressMode Backup = AM;
779243830Sdim      AM.BlockAddr = BA->getBlockAddress();
780243830Sdim      AM.SymbolFlags = BA->getTargetFlags();
781296417Sdim      if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
782243830Sdim        AM = Backup;
783243830Sdim        return true;
784243830Sdim      }
785243830Sdim    } else
786243830Sdim      llvm_unreachable("Unhandled symbol reference node.");
787198090Srdivacky
788195098Sed    if (N.getOpcode() == X86ISD::WrapperRIP)
789195098Sed      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
790195098Sed    return false;
791195098Sed  }
792195098Sed
793195098Sed  // Handle the case when globals fit in our immediate field: This is true for
794234353Sdim  // X86-32 always and X86-64 when in -mcmodel=small mode.  In 64-bit
795234353Sdim  // mode, this only applies to a non-RIP-relative computation.
796195098Sed  if (!Subtarget->is64Bit() ||
797234353Sdim      M == CodeModel::Small || M == CodeModel::Kernel) {
798234353Sdim    assert(N.getOpcode() != X86ISD::WrapperRIP &&
799234353Sdim           "RIP-relative addressing already handled");
800195098Sed    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
801195098Sed      AM.GV = G->getGlobal();
802195098Sed      AM.Disp += G->getOffset();
803195098Sed      AM.SymbolFlags = G->getTargetFlags();
804195098Sed    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
805193323Sed      AM.CP = CP->getConstVal();
806193323Sed      AM.Align = CP->getAlignment();
807195098Sed      AM.Disp += CP->getOffset();
808195098Sed      AM.SymbolFlags = CP->getTargetFlags();
809195098Sed    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
810195098Sed      AM.ES = S->getSymbol();
811195098Sed      AM.SymbolFlags = S->getTargetFlags();
812288943Sdim    } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
813288943Sdim      AM.MCSym = S->getMCSymbol();
814198892Srdivacky    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
815195098Sed      AM.JT = J->getIndex();
816195098Sed      AM.SymbolFlags = J->getTargetFlags();
817243830Sdim    } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
818243830Sdim      AM.BlockAddr = BA->getBlockAddress();
819243830Sdim      AM.Disp += BA->getOffset();
820243830Sdim      AM.SymbolFlags = BA->getTargetFlags();
821243830Sdim    } else
822243830Sdim      llvm_unreachable("Unhandled symbol reference node.");
823193323Sed    return false;
824193323Sed  }
825193323Sed
826193323Sed  return true;
827193323Sed}
828193323Sed
829296417Sdim/// Add the specified node to the specified addressing mode, returning true if
830296417Sdim/// it cannot be done. This just pattern matches for the addressing mode.
831296417Sdimbool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
832296417Sdim  if (matchAddressRecursively(N, AM, 0))
833198090Srdivacky    return true;
834198090Srdivacky
835198090Srdivacky  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
836198090Srdivacky  // a smaller encoding and avoids a scaled-index.
837198090Srdivacky  if (AM.Scale == 2 &&
838198090Srdivacky      AM.BaseType == X86ISelAddressMode::RegBase &&
839276479Sdim      AM.Base_Reg.getNode() == nullptr) {
840207618Srdivacky    AM.Base_Reg = AM.IndexReg;
841198090Srdivacky    AM.Scale = 1;
842198090Srdivacky  }
843198090Srdivacky
844198090Srdivacky  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
845198090Srdivacky  // because it has a smaller encoding.
846198090Srdivacky  // TODO: Which other code models can use this?
847198090Srdivacky  if (TM.getCodeModel() == CodeModel::Small &&
848198090Srdivacky      Subtarget->is64Bit() &&
849198090Srdivacky      AM.Scale == 1 &&
850198090Srdivacky      AM.BaseType == X86ISelAddressMode::RegBase &&
851276479Sdim      AM.Base_Reg.getNode() == nullptr &&
852276479Sdim      AM.IndexReg.getNode() == nullptr &&
853198090Srdivacky      AM.SymbolFlags == X86II::MO_NO_FLAG &&
854198090Srdivacky      AM.hasSymbolicDisplacement())
855207618Srdivacky    AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
856198090Srdivacky
857198090Srdivacky  return false;
858198090Srdivacky}
859198090Srdivacky
860296417Sdimbool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
861296417Sdim                               unsigned Depth) {
862296417Sdim  // Add an artificial use to this node so that we can keep track of
863296417Sdim  // it if it gets CSE'd with a different node.
864296417Sdim  HandleSDNode Handle(N);
865296417Sdim
866296417Sdim  X86ISelAddressMode Backup = AM;
867296417Sdim  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
868296417Sdim      !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
869296417Sdim    return false;
870296417Sdim  AM = Backup;
871296417Sdim
872296417Sdim  // Try again after commuting the operands.
873296417Sdim  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
874296417Sdim      !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
875296417Sdim    return false;
876296417Sdim  AM = Backup;
877296417Sdim
878296417Sdim  // If we couldn't fold both operands into the address at the same time,
879296417Sdim  // see if we can just put each operand into a register and fold at least
880296417Sdim  // the add.
881296417Sdim  if (AM.BaseType == X86ISelAddressMode::RegBase &&
882296417Sdim      !AM.Base_Reg.getNode() &&
883296417Sdim      !AM.IndexReg.getNode()) {
884296417Sdim    N = Handle.getValue();
885296417Sdim    AM.Base_Reg = N.getOperand(0);
886296417Sdim    AM.IndexReg = N.getOperand(1);
887296417Sdim    AM.Scale = 1;
888296417Sdim    return false;
889296417Sdim  }
890296417Sdim  N = Handle.getValue();
891296417Sdim  return true;
892296417Sdim}
893296417Sdim
894234353Sdim// Insert a node into the DAG at least before the Pos node's position. This
895234353Sdim// will reposition the node as needed, and will assign it a node ID that is <=
896234353Sdim// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
897234353Sdim// IDs! The selection DAG must no longer depend on their uniqueness when this
898234353Sdim// is used.
899296417Sdimstatic void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
900234353Sdim  if (N.getNode()->getNodeId() == -1 ||
901234353Sdim      N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
902296417Sdim    DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode());
903234353Sdim    N.getNode()->setNodeId(Pos.getNode()->getNodeId());
904234353Sdim  }
905234353Sdim}
906234353Sdim
907280031Sdim// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
908280031Sdim// safe. This allows us to convert the shift and and into an h-register
909280031Sdim// extract and a scaled index. Returns false if the simplification is
910280031Sdim// performed.
911296417Sdimstatic bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
912234353Sdim                                      uint64_t Mask,
913234353Sdim                                      SDValue Shift, SDValue X,
914234353Sdim                                      X86ISelAddressMode &AM) {
915234353Sdim  if (Shift.getOpcode() != ISD::SRL ||
916234353Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)) ||
917234353Sdim      !Shift.hasOneUse())
918234353Sdim    return true;
919234353Sdim
920234353Sdim  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
921234353Sdim  if (ScaleLog <= 0 || ScaleLog >= 4 ||
922234353Sdim      Mask != (0xffu << ScaleLog))
923234353Sdim    return true;
924234353Sdim
925261991Sdim  MVT VT = N.getSimpleValueType();
926261991Sdim  SDLoc DL(N);
927288943Sdim  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
928288943Sdim  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
929234353Sdim  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
930234353Sdim  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
931288943Sdim  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
932234353Sdim  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
933234353Sdim
934234353Sdim  // Insert the new nodes into the topological ordering. We must do this in
935234353Sdim  // a valid topological ordering as nothing is going to go back and re-sort
936234353Sdim  // these nodes. We continually insert before 'N' in sequence as this is
937234353Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
938234353Sdim  // hierarchy left to express.
939296417Sdim  insertDAGNode(DAG, N, Eight);
940296417Sdim  insertDAGNode(DAG, N, Srl);
941296417Sdim  insertDAGNode(DAG, N, NewMask);
942296417Sdim  insertDAGNode(DAG, N, And);
943296417Sdim  insertDAGNode(DAG, N, ShlCount);
944296417Sdim  insertDAGNode(DAG, N, Shl);
945234353Sdim  DAG.ReplaceAllUsesWith(N, Shl);
946234353Sdim  AM.IndexReg = And;
947234353Sdim  AM.Scale = (1 << ScaleLog);
948234353Sdim  return false;
949234353Sdim}
950234353Sdim
951234353Sdim// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
952234353Sdim// allows us to fold the shift into this addressing mode. Returns false if the
953234353Sdim// transform succeeded.
954296417Sdimstatic bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
955234353Sdim                                        uint64_t Mask,
956234353Sdim                                        SDValue Shift, SDValue X,
957234353Sdim                                        X86ISelAddressMode &AM) {
958234353Sdim  if (Shift.getOpcode() != ISD::SHL ||
959234353Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)))
960234353Sdim    return true;
961234353Sdim
962234353Sdim  // Not likely to be profitable if either the AND or SHIFT node has more
963234353Sdim  // than one use (unless all uses are for address computation). Besides,
964234353Sdim  // isel mechanism requires their node ids to be reused.
965234353Sdim  if (!N.hasOneUse() || !Shift.hasOneUse())
966234353Sdim    return true;
967234353Sdim
968234353Sdim  // Verify that the shift amount is something we can fold.
969234353Sdim  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
970234353Sdim  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
971234353Sdim    return true;
972234353Sdim
973261991Sdim  MVT VT = N.getSimpleValueType();
974261991Sdim  SDLoc DL(N);
975288943Sdim  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
976234353Sdim  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
977234353Sdim  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
978234353Sdim
979234353Sdim  // Insert the new nodes into the topological ordering. We must do this in
980234353Sdim  // a valid topological ordering as nothing is going to go back and re-sort
981234353Sdim  // these nodes. We continually insert before 'N' in sequence as this is
982234353Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
983234353Sdim  // hierarchy left to express.
984296417Sdim  insertDAGNode(DAG, N, NewMask);
985296417Sdim  insertDAGNode(DAG, N, NewAnd);
986296417Sdim  insertDAGNode(DAG, N, NewShift);
987234353Sdim  DAG.ReplaceAllUsesWith(N, NewShift);
988234353Sdim
989234353Sdim  AM.Scale = 1 << ShiftAmt;
990234353Sdim  AM.IndexReg = NewAnd;
991234353Sdim  return false;
992234353Sdim}
993234353Sdim
994234353Sdim// Implement some heroics to detect shifts of masked values where the mask can
995234353Sdim// be replaced by extending the shift and undoing that in the addressing mode
996234353Sdim// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
997234353Sdim// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
998234353Sdim// the addressing mode. This results in code such as:
999234353Sdim//
1000234353Sdim//   int f(short *y, int *lookup_table) {
1001234353Sdim//     ...
1002234353Sdim//     return *y + lookup_table[*y >> 11];
1003234353Sdim//   }
1004234353Sdim//
1005234353Sdim// Turning into:
1006234353Sdim//   movzwl (%rdi), %eax
1007234353Sdim//   movl %eax, %ecx
1008234353Sdim//   shrl $11, %ecx
1009234353Sdim//   addl (%rsi,%rcx,4), %eax
1010234353Sdim//
1011234353Sdim// Instead of:
1012234353Sdim//   movzwl (%rdi), %eax
1013234353Sdim//   movl %eax, %ecx
1014234353Sdim//   shrl $9, %ecx
1015234353Sdim//   andl $124, %rcx
1016234353Sdim//   addl (%rsi,%rcx), %eax
1017234353Sdim//
1018234353Sdim// Note that this function assumes the mask is provided as a mask *after* the
1019234353Sdim// value is shifted. The input chain may or may not match that, but computing
1020234353Sdim// such a mask is trivial.
1021296417Sdimstatic bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
1022234353Sdim                                    uint64_t Mask,
1023234353Sdim                                    SDValue Shift, SDValue X,
1024234353Sdim                                    X86ISelAddressMode &AM) {
1025234353Sdim  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1026234353Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)))
1027234353Sdim    return true;
1028234353Sdim
1029234353Sdim  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1030261991Sdim  unsigned MaskLZ = countLeadingZeros(Mask);
1031261991Sdim  unsigned MaskTZ = countTrailingZeros(Mask);
1032234353Sdim
1033234353Sdim  // The amount of shift we're trying to fit into the addressing mode is taken
1034234353Sdim  // from the trailing zeros of the mask.
1035234353Sdim  unsigned AMShiftAmt = MaskTZ;
1036234353Sdim
1037234353Sdim  // There is nothing we can do here unless the mask is removing some bits.
1038234353Sdim  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1039234353Sdim  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1040234353Sdim
1041234353Sdim  // We also need to ensure that mask is a continuous run of bits.
1042288943Sdim  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1043234353Sdim
1044234353Sdim  // Scale the leading zero count down based on the actual size of the value.
1045234353Sdim  // Also scale it down based on the size of the shift.
1046261991Sdim  MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1047234353Sdim
1048234353Sdim  // The final check is to ensure that any masked out high bits of X are
1049234353Sdim  // already known to be zero. Otherwise, the mask has a semantic impact
1050234353Sdim  // other than masking out a couple of low bits. Unfortunately, because of
1051234353Sdim  // the mask, zero extensions will be removed from operands in some cases.
1052234353Sdim  // This code works extra hard to look through extensions because we can
1053234353Sdim  // replace them with zero extensions cheaply if necessary.
1054234353Sdim  bool ReplacingAnyExtend = false;
1055234353Sdim  if (X.getOpcode() == ISD::ANY_EXTEND) {
1056261991Sdim    unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1057261991Sdim                          X.getOperand(0).getSimpleValueType().getSizeInBits();
1058234353Sdim    // Assume that we'll replace the any-extend with a zero-extend, and
1059234353Sdim    // narrow the search to the extended value.
1060234353Sdim    X = X.getOperand(0);
1061234353Sdim    MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1062234353Sdim    ReplacingAnyExtend = true;
1063234353Sdim  }
1064261991Sdim  APInt MaskedHighBits =
1065261991Sdim    APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
1066234353Sdim  APInt KnownZero, KnownOne;
1067276479Sdim  DAG.computeKnownBits(X, KnownZero, KnownOne);
1068234353Sdim  if (MaskedHighBits != KnownZero) return true;
1069234353Sdim
1070234353Sdim  // We've identified a pattern that can be transformed into a single shift
1071234353Sdim  // and an addressing mode. Make it so.
1072261991Sdim  MVT VT = N.getSimpleValueType();
1073234353Sdim  if (ReplacingAnyExtend) {
1074234353Sdim    assert(X.getValueType() != VT);
1075234353Sdim    // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1076261991Sdim    SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1077296417Sdim    insertDAGNode(DAG, N, NewX);
1078234353Sdim    X = NewX;
1079234353Sdim  }
1080261991Sdim  SDLoc DL(N);
1081288943Sdim  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1082234353Sdim  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1083288943Sdim  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1084234353Sdim  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1085234353Sdim
1086234353Sdim  // Insert the new nodes into the topological ordering. We must do this in
1087234353Sdim  // a valid topological ordering as nothing is going to go back and re-sort
1088234353Sdim  // these nodes. We continually insert before 'N' in sequence as this is
1089234353Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1090234353Sdim  // hierarchy left to express.
1091296417Sdim  insertDAGNode(DAG, N, NewSRLAmt);
1092296417Sdim  insertDAGNode(DAG, N, NewSRL);
1093296417Sdim  insertDAGNode(DAG, N, NewSHLAmt);
1094296417Sdim  insertDAGNode(DAG, N, NewSHL);
1095234353Sdim  DAG.ReplaceAllUsesWith(N, NewSHL);
1096234353Sdim
1097234353Sdim  AM.Scale = 1 << AMShiftAmt;
1098234353Sdim  AM.IndexReg = NewSRL;
1099234353Sdim  return false;
1100234353Sdim}
1101234353Sdim
1102296417Sdimbool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1103198090Srdivacky                                              unsigned Depth) {
1104261991Sdim  SDLoc dl(N);
1105198090Srdivacky  DEBUG({
1106202375Srdivacky      dbgs() << "MatchAddress: ";
1107198090Srdivacky      AM.dump();
1108198090Srdivacky    });
1109193323Sed  // Limit recursion.
1110193323Sed  if (Depth > 5)
1111296417Sdim    return matchAddressBase(N, AM);
1112198090Srdivacky
1113195098Sed  // If this is already a %rip relative address, we can only merge immediates
1114195098Sed  // into it.  Instead of handling this in every case, we handle it here.
1115193323Sed  // RIP relative addressing: %rip + 32-bit displacement!
1116195098Sed  if (AM.isRIPRelative()) {
1117195098Sed    // FIXME: JumpTable and ExternalSymbol address currently don't like
1118195098Sed    // displacements.  It isn't very important, but this should be fixed for
1119195098Sed    // consistency.
1120288943Sdim    if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1121288943Sdim      return true;
1122198090Srdivacky
1123224145Sdim    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1124296417Sdim      if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1125193323Sed        return false;
1126193323Sed    return true;
1127193323Sed  }
1128193323Sed
1129193323Sed  switch (N.getOpcode()) {
1130193323Sed  default: break;
1131288943Sdim  case ISD::LOCAL_RECOVER: {
1132288943Sdim    if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1133288943Sdim      if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1134288943Sdim        // Use the symbol and don't prefix it.
1135288943Sdim        AM.MCSym = ESNode->getMCSymbol();
1136288943Sdim        return false;
1137288943Sdim      }
1138288943Sdim    break;
1139288943Sdim  }
1140193323Sed  case ISD::Constant: {
1141193323Sed    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1142296417Sdim    if (!foldOffsetIntoAddress(Val, AM))
1143193323Sed      return false;
1144193323Sed    break;
1145193323Sed  }
1146193323Sed
1147193323Sed  case X86ISD::Wrapper:
1148195098Sed  case X86ISD::WrapperRIP:
1149296417Sdim    if (!matchWrapper(N, AM))
1150193323Sed      return false;
1151193323Sed    break;
1152193323Sed
1153193323Sed  case ISD::LOAD:
1154296417Sdim    if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1155193323Sed      return false;
1156193323Sed    break;
1157193323Sed
1158193323Sed  case ISD::FrameIndex:
1159224145Sdim    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1160276479Sdim        AM.Base_Reg.getNode() == nullptr &&
1161224145Sdim        (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1162193323Sed      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1163207618Srdivacky      AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1164193323Sed      return false;
1165193323Sed    }
1166193323Sed    break;
1167193323Sed
1168193323Sed  case ISD::SHL:
1169276479Sdim    if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1170193323Sed      break;
1171239462Sdim
1172193323Sed    if (ConstantSDNode
1173193323Sed          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
1174193323Sed      unsigned Val = CN->getZExtValue();
1175198090Srdivacky      // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1176198090Srdivacky      // that the base operand remains free for further matching. If
1177198090Srdivacky      // the base doesn't end up getting used, a post-processing step
1178198090Srdivacky      // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1179193323Sed      if (Val == 1 || Val == 2 || Val == 3) {
1180193323Sed        AM.Scale = 1 << Val;
1181193323Sed        SDValue ShVal = N.getNode()->getOperand(0);
1182193323Sed
1183193323Sed        // Okay, we know that we have a scale by now.  However, if the scaled
1184193323Sed        // value is an add of something and a constant, we can fold the
1185193323Sed        // constant into the disp field here.
1186218893Sdim        if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1187193323Sed          AM.IndexReg = ShVal.getNode()->getOperand(0);
1188193323Sed          ConstantSDNode *AddVal =
1189193323Sed            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
1190243830Sdim          uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1191296417Sdim          if (!foldOffsetIntoAddress(Disp, AM))
1192224145Sdim            return false;
1193193323Sed        }
1194224145Sdim
1195224145Sdim        AM.IndexReg = ShVal;
1196193323Sed        return false;
1197193323Sed      }
1198249423Sdim    }
1199193323Sed    break;
1200193323Sed
1201234353Sdim  case ISD::SRL: {
1202234353Sdim    // Scale must not be used already.
1203276479Sdim    if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1204234353Sdim
1205234353Sdim    SDValue And = N.getOperand(0);
1206234353Sdim    if (And.getOpcode() != ISD::AND) break;
1207234353Sdim    SDValue X = And.getOperand(0);
1208234353Sdim
1209234353Sdim    // We only handle up to 64-bit values here as those are what matter for
1210234353Sdim    // addressing mode optimizations.
1211261991Sdim    if (X.getSimpleValueType().getSizeInBits() > 64) break;
1212234353Sdim
1213234353Sdim    // The mask used for the transform is expected to be post-shift, but we
1214234353Sdim    // found the shift first so just apply the shift to the mask before passing
1215234353Sdim    // it down.
1216234353Sdim    if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1217234353Sdim        !isa<ConstantSDNode>(And.getOperand(1)))
1218234353Sdim      break;
1219234353Sdim    uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1220234353Sdim
1221234353Sdim    // Try to fold the mask and shift into the scale, and return false if we
1222234353Sdim    // succeed.
1223296417Sdim    if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1224234353Sdim      return false;
1225234353Sdim    break;
1226234353Sdim  }
1227234353Sdim
1228193323Sed  case ISD::SMUL_LOHI:
1229193323Sed  case ISD::UMUL_LOHI:
1230193323Sed    // A mul_lohi where we need the low part can be folded as a plain multiply.
1231193323Sed    if (N.getResNo() != 0) break;
1232193323Sed    // FALL THROUGH
1233193323Sed  case ISD::MUL:
1234193323Sed  case X86ISD::MUL_IMM:
1235193323Sed    // X*[3,5,9] -> X+X*[2,4,8]
1236193323Sed    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1237276479Sdim        AM.Base_Reg.getNode() == nullptr &&
1238276479Sdim        AM.IndexReg.getNode() == nullptr) {
1239193323Sed      if (ConstantSDNode
1240193323Sed            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
1241193323Sed        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1242193323Sed            CN->getZExtValue() == 9) {
1243193323Sed          AM.Scale = unsigned(CN->getZExtValue())-1;
1244193323Sed
1245193323Sed          SDValue MulVal = N.getNode()->getOperand(0);
1246193323Sed          SDValue Reg;
1247193323Sed
1248193323Sed          // Okay, we know that we have a scale by now.  However, if the scaled
1249193323Sed          // value is an add of something and a constant, we can fold the
1250193323Sed          // constant into the disp field here.
1251193323Sed          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1252193323Sed              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
1253193323Sed            Reg = MulVal.getNode()->getOperand(0);
1254193323Sed            ConstantSDNode *AddVal =
1255193323Sed              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
1256224145Sdim            uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1257296417Sdim            if (foldOffsetIntoAddress(Disp, AM))
1258193323Sed              Reg = N.getNode()->getOperand(0);
1259193323Sed          } else {
1260193323Sed            Reg = N.getNode()->getOperand(0);
1261193323Sed          }
1262193323Sed
1263207618Srdivacky          AM.IndexReg = AM.Base_Reg = Reg;
1264193323Sed          return false;
1265193323Sed        }
1266193323Sed    }
1267193323Sed    break;
1268193323Sed
1269193323Sed  case ISD::SUB: {
1270193323Sed    // Given A-B, if A can be completely folded into the address and
1271193323Sed    // the index field with the index field unused, use -B as the index.
1272193323Sed    // This is a win if a has multiple parts that can be folded into
1273193323Sed    // the address. Also, this saves a mov if the base register has
1274193323Sed    // other uses, since it avoids a two-address sub instruction, however
1275193323Sed    // it costs an additional mov if the index register has other uses.
1276193323Sed
1277210299Sed    // Add an artificial use to this node so that we can keep track of
1278210299Sed    // it if it gets CSE'd with a different node.
1279210299Sed    HandleSDNode Handle(N);
1280210299Sed
1281193323Sed    // Test if the LHS of the sub can be folded.
1282193323Sed    X86ISelAddressMode Backup = AM;
1283296417Sdim    if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
1284193323Sed      AM = Backup;
1285193323Sed      break;
1286193323Sed    }
1287193323Sed    // Test if the index field is free for use.
1288195098Sed    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1289193323Sed      AM = Backup;
1290193323Sed      break;
1291193323Sed    }
1292205407Srdivacky
1293193323Sed    int Cost = 0;
1294210299Sed    SDValue RHS = Handle.getValue().getNode()->getOperand(1);
1295193323Sed    // If the RHS involves a register with multiple uses, this
1296193323Sed    // transformation incurs an extra mov, due to the neg instruction
1297193323Sed    // clobbering its operand.
1298193323Sed    if (!RHS.getNode()->hasOneUse() ||
1299193323Sed        RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1300193323Sed        RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1301193323Sed        RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1302193323Sed        (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1303193323Sed         RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
1304193323Sed      ++Cost;
1305193323Sed    // If the base is a register with multiple uses, this
1306193323Sed    // transformation may save a mov.
1307193323Sed    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
1308207618Srdivacky         AM.Base_Reg.getNode() &&
1309207618Srdivacky         !AM.Base_Reg.getNode()->hasOneUse()) ||
1310193323Sed        AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1311193323Sed      --Cost;
1312193323Sed    // If the folded LHS was interesting, this transformation saves
1313193323Sed    // address arithmetic.
1314193323Sed    if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1315193323Sed        ((AM.Disp != 0) && (Backup.Disp == 0)) +
1316193323Sed        (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1317193323Sed      --Cost;
1318193323Sed    // If it doesn't look like it may be an overall win, don't do it.
1319193323Sed    if (Cost >= 0) {
1320193323Sed      AM = Backup;
1321193323Sed      break;
1322193323Sed    }
1323193323Sed
1324193323Sed    // Ok, the transformation is legal and appears profitable. Go for it.
1325288943Sdim    SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1326193323Sed    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1327193323Sed    AM.IndexReg = Neg;
1328193323Sed    AM.Scale = 1;
1329193323Sed
1330193323Sed    // Insert the new nodes into the topological ordering.
1331296417Sdim    insertDAGNode(*CurDAG, N, Zero);
1332296417Sdim    insertDAGNode(*CurDAG, N, Neg);
1333193323Sed    return false;
1334193323Sed  }
1335193323Sed
1336296417Sdim  case ISD::ADD:
1337296417Sdim    if (!matchAdd(N, AM, Depth))
1338210299Sed      return false;
1339193323Sed    break;
1340193323Sed
1341193323Sed  case ISD::OR:
1342296417Sdim    // We want to look through a transform in InstCombine and DAGCombiner that
1343296417Sdim    // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1344296417Sdim    // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1345296417Sdim    // An 'lea' can then be used to match the shift (multiply) and add:
1346296417Sdim    // and $1, %esi
1347296417Sdim    // lea (%rsi, %rdi, 8), %rax
1348296417Sdim    if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1349296417Sdim        !matchAdd(N, AM, Depth))
1350296417Sdim      return false;
1351193323Sed    break;
1352239462Sdim
1353193323Sed  case ISD::AND: {
1354193323Sed    // Perform some heroic transforms on an and of a constant-count shift
1355193323Sed    // with a constant to enable use of the scaled offset field.
1356193323Sed
1357193323Sed    // Scale must not be used already.
1358276479Sdim    if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1359193323Sed
1360234353Sdim    SDValue Shift = N.getOperand(0);
1361234353Sdim    if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1362193323Sed    SDValue X = Shift.getOperand(0);
1363193323Sed
1364234353Sdim    // We only handle up to 64-bit values here as those are what matter for
1365234353Sdim    // addressing mode optimizations.
1366261991Sdim    if (X.getSimpleValueType().getSizeInBits() > 64) break;
1367193323Sed
1368234353Sdim    if (!isa<ConstantSDNode>(N.getOperand(1)))
1369234353Sdim      break;
1370234353Sdim    uint64_t Mask = N.getConstantOperandVal(1);
1371193323Sed
1372234353Sdim    // Try to fold the mask and shift into an extract and scale.
1373296417Sdim    if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1374234353Sdim      return false;
1375193323Sed
1376234353Sdim    // Try to fold the mask and shift directly into the scale.
1377296417Sdim    if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1378234353Sdim      return false;
1379193323Sed
1380234353Sdim    // Try to swap the mask and shift to place shifts which can be done as
1381234353Sdim    // a scale on the outside of the mask.
1382296417Sdim    if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1383234353Sdim      return false;
1384234353Sdim    break;
1385193323Sed  }
1386193323Sed  }
1387193323Sed
1388296417Sdim  return matchAddressBase(N, AM);
1389193323Sed}
1390193323Sed
1391296417Sdim/// Helper for MatchAddress. Add the specified node to the
1392193323Sed/// specified addressing mode without any further recursion.
1393296417Sdimbool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1394193323Sed  // Is the base register already occupied?
1395207618Srdivacky  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1396193323Sed    // If so, check to see if the scale index register is set.
1397276479Sdim    if (!AM.IndexReg.getNode()) {
1398193323Sed      AM.IndexReg = N;
1399193323Sed      AM.Scale = 1;
1400193323Sed      return false;
1401193323Sed    }
1402193323Sed
1403193323Sed    // Otherwise, we cannot select it.
1404193323Sed    return true;
1405193323Sed  }
1406193323Sed
1407193323Sed  // Default, generate it as a register.
1408193323Sed  AM.BaseType = X86ISelAddressMode::RegBase;
1409207618Srdivacky  AM.Base_Reg = N;
1410193323Sed  return false;
1411193323Sed}
1412193323Sed
1413296417Sdimbool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1414288943Sdim                                      SDValue &Scale, SDValue &Index,
1415288943Sdim                                      SDValue &Disp, SDValue &Segment) {
1416288943Sdim
1417288943Sdim  MaskedGatherScatterSDNode *Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent);
1418288943Sdim  if (!Mgs)
1419288943Sdim    return false;
1420288943Sdim  X86ISelAddressMode AM;
1421288943Sdim  unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace();
1422288943Sdim  // AddrSpace 256 -> GS, 257 -> FS.
1423288943Sdim  if (AddrSpace == 256)
1424288943Sdim    AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1425288943Sdim  if (AddrSpace == 257)
1426288943Sdim    AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1427288943Sdim
1428288943Sdim  SDLoc DL(N);
1429288943Sdim  Base = Mgs->getBasePtr();
1430288943Sdim  Index = Mgs->getIndex();
1431288943Sdim  unsigned ScalarSize = Mgs->getValue().getValueType().getScalarSizeInBits();
1432288943Sdim  Scale = getI8Imm(ScalarSize/8, DL);
1433288943Sdim
1434288943Sdim  // If Base is 0, the whole address is in index and the Scale is 1
1435288943Sdim  if (isa<ConstantSDNode>(Base)) {
1436296417Sdim    assert(cast<ConstantSDNode>(Base)->isNullValue() &&
1437288943Sdim           "Unexpected base in gather/scatter");
1438288943Sdim    Scale = getI8Imm(1, DL);
1439288943Sdim    Base = CurDAG->getRegister(0, MVT::i32);
1440288943Sdim  }
1441288943Sdim  if (AM.Segment.getNode())
1442288943Sdim    Segment = AM.Segment;
1443288943Sdim  else
1444288943Sdim    Segment = CurDAG->getRegister(0, MVT::i32);
1445288943Sdim  Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1446288943Sdim  return true;
1447288943Sdim}
1448288943Sdim
1449296417Sdim/// Returns true if it is able to pattern match an addressing mode.
1450193323Sed/// It returns the operands which make up the maximal addressing mode it can
1451193323Sed/// match by reference.
1452218893Sdim///
1453218893Sdim/// Parent is the parent node of the addr operand that is being matched.  It
1454218893Sdim/// is always a load, store, atomic node, or null.  It is only null when
1455218893Sdim/// checking memory operands for inline asm nodes.
1456296417Sdimbool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1457193323Sed                                 SDValue &Scale, SDValue &Index,
1458193323Sed                                 SDValue &Disp, SDValue &Segment) {
1459193323Sed  X86ISelAddressMode AM;
1460239462Sdim
1461218893Sdim  if (Parent &&
1462218893Sdim      // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1463218893Sdim      // that are not a MemSDNode, and thus don't have proper addrspace info.
1464218893Sdim      Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1465218893Sdim      Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1466243830Sdim      Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1467243830Sdim      Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1468243830Sdim      Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1469218893Sdim    unsigned AddrSpace =
1470218893Sdim      cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1471218893Sdim    // AddrSpace 256 -> GS, 257 -> FS.
1472218893Sdim    if (AddrSpace == 256)
1473218893Sdim      AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1474218893Sdim    if (AddrSpace == 257)
1475218893Sdim      AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1476218893Sdim  }
1477239462Sdim
1478296417Sdim  if (matchAddress(N, AM))
1479193323Sed    return false;
1480193323Sed
1481261991Sdim  MVT VT = N.getSimpleValueType();
1482193323Sed  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1483207618Srdivacky    if (!AM.Base_Reg.getNode())
1484207618Srdivacky      AM.Base_Reg = CurDAG->getRegister(0, VT);
1485193323Sed  }
1486193323Sed
1487193323Sed  if (!AM.IndexReg.getNode())
1488193323Sed    AM.IndexReg = CurDAG->getRegister(0, VT);
1489193323Sed
1490288943Sdim  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1491193323Sed  return true;
1492193323Sed}
1493193323Sed
1494296417Sdim/// Match a scalar SSE load. In particular, we want to match a load whose top
1495296417Sdim/// elements are either undef or zeros. The load flavor is derived from the
1496296417Sdim/// type of N, which is either v4f32 or v2f64.
1497204642Srdivacky///
1498204642Srdivacky/// We also return:
1499204642Srdivacky///   PatternChainNode: this is the matched node that has a chain input and
1500204642Srdivacky///   output.
1501296417Sdimbool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
1502193323Sed                                          SDValue N, SDValue &Base,
1503193323Sed                                          SDValue &Scale, SDValue &Index,
1504193323Sed                                          SDValue &Disp, SDValue &Segment,
1505204642Srdivacky                                          SDValue &PatternNodeWithChain) {
1506193323Sed  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1507204642Srdivacky    PatternNodeWithChain = N.getOperand(0);
1508204642Srdivacky    if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1509204642Srdivacky        PatternNodeWithChain.hasOneUse() &&
1510204642Srdivacky        IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1511207618Srdivacky        IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1512204642Srdivacky      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1513296417Sdim      if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1514193323Sed        return false;
1515193323Sed      return true;
1516193323Sed    }
1517193323Sed  }
1518193323Sed
1519193323Sed  // Also handle the case where we explicitly require zeros in the top
1520193323Sed  // elements.  This is a vector shuffle from the zero vector.
1521193323Sed  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1522193323Sed      // Check to see if the top elements are all zeros (or bitcast of zeros).
1523239462Sdim      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1524193323Sed      N.getOperand(0).getNode()->hasOneUse() &&
1525193323Sed      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1526204642Srdivacky      N.getOperand(0).getOperand(0).hasOneUse() &&
1527204642Srdivacky      IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1528207618Srdivacky      IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1529193323Sed    // Okay, this is a zero extending load.  Fold it.
1530193323Sed    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1531296417Sdim    if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1532193323Sed      return false;
1533204642Srdivacky    PatternNodeWithChain = SDValue(LD, 0);
1534193323Sed    return true;
1535193323Sed  }
1536193323Sed  return false;
1537193323Sed}
1538193323Sed
1539193323Sed
1540296417Sdimbool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
1541261991Sdim  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1542261991Sdim    uint64_t ImmVal = CN->getZExtValue();
1543261991Sdim    if ((uint32_t)ImmVal != (uint64_t)ImmVal)
1544261991Sdim      return false;
1545261991Sdim
1546288943Sdim    Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1547261991Sdim    return true;
1548261991Sdim  }
1549261991Sdim
1550261991Sdim  // In static codegen with small code model, we can get the address of a label
1551261991Sdim  // into a register with 'movl'. TableGen has already made sure we're looking
1552261991Sdim  // at a label of some kind.
1553261991Sdim  assert(N->getOpcode() == X86ISD::Wrapper &&
1554261991Sdim         "Unexpected node type for MOV32ri64");
1555261991Sdim  N = N.getOperand(0);
1556261991Sdim
1557261991Sdim  if (N->getOpcode() != ISD::TargetConstantPool &&
1558261991Sdim      N->getOpcode() != ISD::TargetJumpTable &&
1559261991Sdim      N->getOpcode() != ISD::TargetGlobalAddress &&
1560261991Sdim      N->getOpcode() != ISD::TargetExternalSymbol &&
1561288943Sdim      N->getOpcode() != ISD::MCSymbol &&
1562261991Sdim      N->getOpcode() != ISD::TargetBlockAddress)
1563261991Sdim    return false;
1564261991Sdim
1565261991Sdim  Imm = N;
1566261991Sdim  return TM.getCodeModel() == CodeModel::Small;
1567261991Sdim}
1568261991Sdim
1569296417Sdimbool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
1570261991Sdim                                         SDValue &Scale, SDValue &Index,
1571261991Sdim                                         SDValue &Disp, SDValue &Segment) {
1572296417Sdim  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1573261991Sdim    return false;
1574261991Sdim
1575261991Sdim  SDLoc DL(N);
1576261991Sdim  RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
1577261991Sdim  if (RN && RN->getReg() == 0)
1578261991Sdim    Base = CurDAG->getRegister(0, MVT::i64);
1579280031Sdim  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1580261991Sdim    // Base could already be %rip, particularly in the x32 ABI.
1581261991Sdim    Base = SDValue(CurDAG->getMachineNode(
1582261991Sdim                       TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1583288943Sdim                       CurDAG->getTargetConstant(0, DL, MVT::i64),
1584261991Sdim                       Base,
1585288943Sdim                       CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
1586261991Sdim                   0);
1587261991Sdim  }
1588261991Sdim
1589261991Sdim  RN = dyn_cast<RegisterSDNode>(Index);
1590261991Sdim  if (RN && RN->getReg() == 0)
1591261991Sdim    Index = CurDAG->getRegister(0, MVT::i64);
1592261991Sdim  else {
1593261991Sdim    assert(Index.getValueType() == MVT::i32 &&
1594261991Sdim           "Expect to be extending 32-bit registers for use in LEA");
1595261991Sdim    Index = SDValue(CurDAG->getMachineNode(
1596261991Sdim                        TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1597288943Sdim                        CurDAG->getTargetConstant(0, DL, MVT::i64),
1598261991Sdim                        Index,
1599288943Sdim                        CurDAG->getTargetConstant(X86::sub_32bit, DL,
1600288943Sdim                                                  MVT::i32)),
1601261991Sdim                    0);
1602261991Sdim  }
1603261991Sdim
1604261991Sdim  return true;
1605261991Sdim}
1606261991Sdim
1607296417Sdim/// Calls SelectAddr and determines if the maximal addressing
1608193323Sed/// mode it matches can be cost effectively emitted as an LEA instruction.
1609296417Sdimbool X86DAGToDAGISel::selectLEAAddr(SDValue N,
1610193323Sed                                    SDValue &Base, SDValue &Scale,
1611210299Sed                                    SDValue &Index, SDValue &Disp,
1612210299Sed                                    SDValue &Segment) {
1613193323Sed  X86ISelAddressMode AM;
1614193323Sed
1615193323Sed  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1616193323Sed  // segments.
1617193323Sed  SDValue Copy = AM.Segment;
1618193323Sed  SDValue T = CurDAG->getRegister(0, MVT::i32);
1619193323Sed  AM.Segment = T;
1620296417Sdim  if (matchAddress(N, AM))
1621193323Sed    return false;
1622193323Sed  assert (T == AM.Segment);
1623193323Sed  AM.Segment = Copy;
1624193323Sed
1625261991Sdim  MVT VT = N.getSimpleValueType();
1626193323Sed  unsigned Complexity = 0;
1627193323Sed  if (AM.BaseType == X86ISelAddressMode::RegBase)
1628207618Srdivacky    if (AM.Base_Reg.getNode())
1629193323Sed      Complexity = 1;
1630193323Sed    else
1631207618Srdivacky      AM.Base_Reg = CurDAG->getRegister(0, VT);
1632193323Sed  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1633193323Sed    Complexity = 4;
1634193323Sed
1635193323Sed  if (AM.IndexReg.getNode())
1636193323Sed    Complexity++;
1637193323Sed  else
1638193323Sed    AM.IndexReg = CurDAG->getRegister(0, VT);
1639193323Sed
1640193323Sed  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1641193323Sed  // a simple shift.
1642193323Sed  if (AM.Scale > 1)
1643193323Sed    Complexity++;
1644193323Sed
1645193323Sed  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1646296417Sdim  // to a LEA. This is determined with some experimentation but is by no means
1647193323Sed  // optimal (especially for code size consideration). LEA is nice because of
1648193323Sed  // its three-address nature. Tweak the cost function again when we can run
1649193323Sed  // convertToThreeAddress() at register allocation time.
1650193323Sed  if (AM.hasSymbolicDisplacement()) {
1651296417Sdim    // For X86-64, always use LEA to materialize RIP-relative addresses.
1652193323Sed    if (Subtarget->is64Bit())
1653193323Sed      Complexity = 4;
1654193323Sed    else
1655193323Sed      Complexity += 2;
1656193323Sed  }
1657193323Sed
1658207618Srdivacky  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1659193323Sed    Complexity++;
1660193323Sed
1661198090Srdivacky  // If it isn't worth using an LEA, reject it.
1662198090Srdivacky  if (Complexity <= 2)
1663198090Srdivacky    return false;
1664239462Sdim
1665288943Sdim  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1666198090Srdivacky  return true;
1667193323Sed}
1668193323Sed
1669296417Sdim/// This is only run on TargetGlobalTLSAddress nodes.
1670296417Sdimbool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
1671194612Sed                                        SDValue &Scale, SDValue &Index,
1672210299Sed                                        SDValue &Disp, SDValue &Segment) {
1673194612Sed  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1674194612Sed  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1675239462Sdim
1676194612Sed  X86ISelAddressMode AM;
1677194612Sed  AM.GV = GA->getGlobal();
1678194612Sed  AM.Disp += GA->getOffset();
1679207618Srdivacky  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1680195098Sed  AM.SymbolFlags = GA->getTargetFlags();
1681195098Sed
1682194612Sed  if (N.getValueType() == MVT::i32) {
1683194612Sed    AM.Scale = 1;
1684194612Sed    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1685194612Sed  } else {
1686194612Sed    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1687194612Sed  }
1688239462Sdim
1689288943Sdim  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1690194612Sed  return true;
1691194612Sed}
1692194612Sed
1693194612Sed
1694296417Sdimbool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N,
1695193323Sed                                  SDValue &Base, SDValue &Scale,
1696193323Sed                                  SDValue &Index, SDValue &Disp,
1697193323Sed                                  SDValue &Segment) {
1698204642Srdivacky  if (!ISD::isNON_EXTLoad(N.getNode()) ||
1699204642Srdivacky      !IsProfitableToFold(N, P, P) ||
1700207618Srdivacky      !IsLegalToFold(N, P, P, OptLevel))
1701204642Srdivacky    return false;
1702239462Sdim
1703296417Sdim  return selectAddr(N.getNode(),
1704218893Sdim                    N.getOperand(1), Base, Scale, Index, Disp, Segment);
1705193323Sed}
1706193323Sed
1707296417Sdim/// Return an SDNode that returns the value of the global base register.
1708296417Sdim/// Output instructions required to initialize the global base register,
1709296417Sdim/// if necessary.
1710193323SedSDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1711193399Sed  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1712288943Sdim  auto &DL = MF->getDataLayout();
1713288943Sdim  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
1714193323Sed}
1715193323Sed
1716243830Sdim/// Atomic opcode table
1717243830Sdim///
1718223017Sdimenum AtomicOpc {
1719243830Sdim  ADD,
1720243830Sdim  SUB,
1721243830Sdim  INC,
1722243830Sdim  DEC,
1723223017Sdim  OR,
1724223017Sdim  AND,
1725223017Sdim  XOR,
1726223017Sdim  AtomicOpcEnd
1727223017Sdim};
1728223017Sdim
1729223017Sdimenum AtomicSz {
1730223017Sdim  ConstantI8,
1731223017Sdim  I8,
1732223017Sdim  SextConstantI16,
1733223017Sdim  ConstantI16,
1734223017Sdim  I16,
1735223017Sdim  SextConstantI32,
1736223017Sdim  ConstantI32,
1737223017Sdim  I32,
1738223017Sdim  SextConstantI64,
1739223017Sdim  ConstantI64,
1740223017Sdim  I64,
1741223017Sdim  AtomicSzEnd
1742223017Sdim};
1743223017Sdim
1744234353Sdimstatic const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
1745223017Sdim  {
1746243830Sdim    X86::LOCK_ADD8mi,
1747243830Sdim    X86::LOCK_ADD8mr,
1748243830Sdim    X86::LOCK_ADD16mi8,
1749243830Sdim    X86::LOCK_ADD16mi,
1750243830Sdim    X86::LOCK_ADD16mr,
1751243830Sdim    X86::LOCK_ADD32mi8,
1752243830Sdim    X86::LOCK_ADD32mi,
1753243830Sdim    X86::LOCK_ADD32mr,
1754243830Sdim    X86::LOCK_ADD64mi8,
1755243830Sdim    X86::LOCK_ADD64mi32,
1756243830Sdim    X86::LOCK_ADD64mr,
1757243830Sdim  },
1758243830Sdim  {
1759243830Sdim    X86::LOCK_SUB8mi,
1760243830Sdim    X86::LOCK_SUB8mr,
1761243830Sdim    X86::LOCK_SUB16mi8,
1762243830Sdim    X86::LOCK_SUB16mi,
1763243830Sdim    X86::LOCK_SUB16mr,
1764243830Sdim    X86::LOCK_SUB32mi8,
1765243830Sdim    X86::LOCK_SUB32mi,
1766243830Sdim    X86::LOCK_SUB32mr,
1767243830Sdim    X86::LOCK_SUB64mi8,
1768243830Sdim    X86::LOCK_SUB64mi32,
1769243830Sdim    X86::LOCK_SUB64mr,
1770243830Sdim  },
1771243830Sdim  {
1772243830Sdim    0,
1773243830Sdim    X86::LOCK_INC8m,
1774243830Sdim    0,
1775243830Sdim    0,
1776243830Sdim    X86::LOCK_INC16m,
1777243830Sdim    0,
1778243830Sdim    0,
1779243830Sdim    X86::LOCK_INC32m,
1780243830Sdim    0,
1781243830Sdim    0,
1782243830Sdim    X86::LOCK_INC64m,
1783243830Sdim  },
1784243830Sdim  {
1785243830Sdim    0,
1786243830Sdim    X86::LOCK_DEC8m,
1787243830Sdim    0,
1788243830Sdim    0,
1789243830Sdim    X86::LOCK_DEC16m,
1790243830Sdim    0,
1791243830Sdim    0,
1792243830Sdim    X86::LOCK_DEC32m,
1793243830Sdim    0,
1794243830Sdim    0,
1795243830Sdim    X86::LOCK_DEC64m,
1796243830Sdim  },
1797243830Sdim  {
1798223017Sdim    X86::LOCK_OR8mi,
1799223017Sdim    X86::LOCK_OR8mr,
1800223017Sdim    X86::LOCK_OR16mi8,
1801223017Sdim    X86::LOCK_OR16mi,
1802223017Sdim    X86::LOCK_OR16mr,
1803223017Sdim    X86::LOCK_OR32mi8,
1804223017Sdim    X86::LOCK_OR32mi,
1805223017Sdim    X86::LOCK_OR32mr,
1806223017Sdim    X86::LOCK_OR64mi8,
1807223017Sdim    X86::LOCK_OR64mi32,
1808243830Sdim    X86::LOCK_OR64mr,
1809223017Sdim  },
1810223017Sdim  {
1811223017Sdim    X86::LOCK_AND8mi,
1812223017Sdim    X86::LOCK_AND8mr,
1813223017Sdim    X86::LOCK_AND16mi8,
1814223017Sdim    X86::LOCK_AND16mi,
1815223017Sdim    X86::LOCK_AND16mr,
1816223017Sdim    X86::LOCK_AND32mi8,
1817223017Sdim    X86::LOCK_AND32mi,
1818223017Sdim    X86::LOCK_AND32mr,
1819223017Sdim    X86::LOCK_AND64mi8,
1820223017Sdim    X86::LOCK_AND64mi32,
1821243830Sdim    X86::LOCK_AND64mr,
1822223017Sdim  },
1823223017Sdim  {
1824223017Sdim    X86::LOCK_XOR8mi,
1825223017Sdim    X86::LOCK_XOR8mr,
1826223017Sdim    X86::LOCK_XOR16mi8,
1827223017Sdim    X86::LOCK_XOR16mi,
1828223017Sdim    X86::LOCK_XOR16mr,
1829223017Sdim    X86::LOCK_XOR32mi8,
1830223017Sdim    X86::LOCK_XOR32mi,
1831223017Sdim    X86::LOCK_XOR32mr,
1832223017Sdim    X86::LOCK_XOR64mi8,
1833223017Sdim    X86::LOCK_XOR64mi32,
1834243830Sdim    X86::LOCK_XOR64mr,
1835223017Sdim  }
1836223017Sdim};
1837223017Sdim
1838243830Sdim// Return the target constant operand for atomic-load-op and do simple
1839243830Sdim// translations, such as from atomic-load-add to lock-sub. The return value is
1840243830Sdim// one of the following 3 cases:
1841243830Sdim// + target-constant, the operand could be supported as a target constant.
1842243830Sdim// + empty, the operand is not needed any more with the new op selected.
1843243830Sdim// + non-empty, otherwise.
1844243830Sdimstatic SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
1845261991Sdim                                                SDLoc dl,
1846261991Sdim                                                enum AtomicOpc &Op, MVT NVT,
1847280031Sdim                                                SDValue Val,
1848280031Sdim                                                const X86Subtarget *Subtarget) {
1849243830Sdim  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
1850243830Sdim    int64_t CNVal = CN->getSExtValue();
1851243830Sdim    // Quit if not 32-bit imm.
1852243830Sdim    if ((int32_t)CNVal != CNVal)
1853243830Sdim      return Val;
1854280031Sdim    // Quit if INT32_MIN: it would be negated as it is negative and overflow,
1855280031Sdim    // producing an immediate that does not fit in the 32 bits available for
1856280031Sdim    // an immediate operand to sub. However, it still fits in 32 bits for the
1857280031Sdim    // add (since it is not negated) so we can return target-constant.
1858280031Sdim    if (CNVal == INT32_MIN)
1859288943Sdim      return CurDAG->getTargetConstant(CNVal, dl, NVT);
1860243830Sdim    // For atomic-load-add, we could do some optimizations.
1861243830Sdim    if (Op == ADD) {
1862243830Sdim      // Translate to INC/DEC if ADD by 1 or -1.
1863280031Sdim      if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) {
1864243830Sdim        Op = (CNVal == 1) ? INC : DEC;
1865243830Sdim        // No more constant operand after being translated into INC/DEC.
1866243830Sdim        return SDValue();
1867243830Sdim      }
1868243830Sdim      // Translate to SUB if ADD by negative value.
1869243830Sdim      if (CNVal < 0) {
1870243830Sdim        Op = SUB;
1871243830Sdim        CNVal = -CNVal;
1872243830Sdim      }
1873243830Sdim    }
1874288943Sdim    return CurDAG->getTargetConstant(CNVal, dl, NVT);
1875243830Sdim  }
1876243830Sdim
1877243830Sdim  // If the value operand is single-used, try to optimize it.
1878243830Sdim  if (Op == ADD && Val.hasOneUse()) {
1879243830Sdim    // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
1880243830Sdim    if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
1881243830Sdim      Op = SUB;
1882243830Sdim      return Val.getOperand(1);
1883243830Sdim    }
1884243830Sdim    // A special case for i16, which needs truncating as, in most cases, it's
1885243830Sdim    // promoted to i32. We will translate
1886243830Sdim    // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
1887243830Sdim    if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
1888243830Sdim        Val.getOperand(0).getOpcode() == ISD::SUB &&
1889243830Sdim        X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
1890243830Sdim      Op = SUB;
1891243830Sdim      Val = Val.getOperand(0);
1892243830Sdim      return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
1893243830Sdim                                            Val.getOperand(1));
1894243830Sdim    }
1895243830Sdim  }
1896243830Sdim
1897243830Sdim  return Val;
1898243830Sdim}
1899243830Sdim
1900296417SdimSDNode *X86DAGToDAGISel::selectAtomicLoadArith(SDNode *Node, MVT NVT) {
1901223017Sdim  if (Node->hasAnyUseOfValue(0))
1902276479Sdim    return nullptr;
1903239462Sdim
1904261991Sdim  SDLoc dl(Node);
1905243830Sdim
1906223017Sdim  // Optimize common patterns for __sync_or_and_fetch and similar arith
1907223017Sdim  // operations where the result is not used. This allows us to use the "lock"
1908223017Sdim  // version of the arithmetic instruction.
1909223017Sdim  SDValue Chain = Node->getOperand(0);
1910223017Sdim  SDValue Ptr = Node->getOperand(1);
1911223017Sdim  SDValue Val = Node->getOperand(2);
1912280031Sdim  SDValue Base, Scale, Index, Disp, Segment;
1913296417Sdim  if (!selectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
1914276479Sdim    return nullptr;
1915223017Sdim
1916223017Sdim  // Which index into the table.
1917223017Sdim  enum AtomicOpc Op;
1918223017Sdim  switch (Node->getOpcode()) {
1919243830Sdim    default:
1920276479Sdim      return nullptr;
1921223017Sdim    case ISD::ATOMIC_LOAD_OR:
1922223017Sdim      Op = OR;
1923223017Sdim      break;
1924223017Sdim    case ISD::ATOMIC_LOAD_AND:
1925223017Sdim      Op = AND;
1926223017Sdim      break;
1927223017Sdim    case ISD::ATOMIC_LOAD_XOR:
1928223017Sdim      Op = XOR;
1929223017Sdim      break;
1930243830Sdim    case ISD::ATOMIC_LOAD_ADD:
1931243830Sdim      Op = ADD;
1932243830Sdim      break;
1933223017Sdim  }
1934251662Sdim
1935280031Sdim  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget);
1936243830Sdim  bool isUnOp = !Val.getNode();
1937243830Sdim  bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
1938239462Sdim
1939223017Sdim  unsigned Opc = 0;
1940261991Sdim  switch (NVT.SimpleTy) {
1941276479Sdim    default: return nullptr;
1942223017Sdim    case MVT::i8:
1943223017Sdim      if (isCN)
1944223017Sdim        Opc = AtomicOpcTbl[Op][ConstantI8];
1945223017Sdim      else
1946223017Sdim        Opc = AtomicOpcTbl[Op][I8];
1947223017Sdim      break;
1948223017Sdim    case MVT::i16:
1949223017Sdim      if (isCN) {
1950223017Sdim        if (immSext8(Val.getNode()))
1951223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI16];
1952223017Sdim        else
1953223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI16];
1954223017Sdim      } else
1955223017Sdim        Opc = AtomicOpcTbl[Op][I16];
1956223017Sdim      break;
1957223017Sdim    case MVT::i32:
1958223017Sdim      if (isCN) {
1959223017Sdim        if (immSext8(Val.getNode()))
1960223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI32];
1961223017Sdim        else
1962223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI32];
1963223017Sdim      } else
1964223017Sdim        Opc = AtomicOpcTbl[Op][I32];
1965223017Sdim      break;
1966223017Sdim    case MVT::i64:
1967223017Sdim      if (isCN) {
1968223017Sdim        if (immSext8(Val.getNode()))
1969223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI64];
1970223017Sdim        else if (i64immSExt32(Val.getNode()))
1971223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI64];
1972280031Sdim        else
1973280031Sdim          llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith");
1974280031Sdim      } else
1975280031Sdim        Opc = AtomicOpcTbl[Op][I64];
1976223017Sdim      break;
1977223017Sdim  }
1978239462Sdim
1979224145Sdim  assert(Opc != 0 && "Invalid arith lock transform!");
1980224145Sdim
1981280031Sdim  // Building the new node.
1982243830Sdim  SDValue Ret;
1983243830Sdim  if (isUnOp) {
1984280031Sdim    SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain };
1985251662Sdim    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
1986243830Sdim  } else {
1987280031Sdim    SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain };
1988251662Sdim    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
1989243830Sdim  }
1990280031Sdim
1991280031Sdim  // Copying the MachineMemOperand.
1992280031Sdim  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1993280031Sdim  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1994223017Sdim  cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
1995280031Sdim
1996280031Sdim  // We need to have two outputs as that is what the original instruction had.
1997280031Sdim  // So we add a dummy, undefined output. This is safe as we checked first
1998280031Sdim  // that no-one uses our output anyway.
1999280031Sdim  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
2000280031Sdim                                                 dl, NVT), 0);
2001223017Sdim  SDValue RetVals[] = { Undef, Ret };
2002276479Sdim  return CurDAG->getMergeValues(RetVals, dl).getNode();
2003223017Sdim}
2004223017Sdim
2005296417Sdim/// Test whether the given X86ISD::CMP node has any uses which require the SF
2006296417Sdim/// or OF bits to be accurate.
2007296417Sdimstatic bool hasNoSignedComparisonUses(SDNode *N) {
2008198090Srdivacky  // Examine each user of the node.
2009198090Srdivacky  for (SDNode::use_iterator UI = N->use_begin(),
2010198090Srdivacky         UE = N->use_end(); UI != UE; ++UI) {
2011198090Srdivacky    // Only examine CopyToReg uses.
2012198090Srdivacky    if (UI->getOpcode() != ISD::CopyToReg)
2013198090Srdivacky      return false;
2014198090Srdivacky    // Only examine CopyToReg uses that copy to EFLAGS.
2015198090Srdivacky    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
2016198090Srdivacky          X86::EFLAGS)
2017198090Srdivacky      return false;
2018198090Srdivacky    // Examine each user of the CopyToReg use.
2019198090Srdivacky    for (SDNode::use_iterator FlagUI = UI->use_begin(),
2020198090Srdivacky           FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2021198090Srdivacky      // Only examine the Flag result.
2022198090Srdivacky      if (FlagUI.getUse().getResNo() != 1) continue;
2023198090Srdivacky      // Anything unusual: assume conservatively.
2024198090Srdivacky      if (!FlagUI->isMachineOpcode()) return false;
2025198090Srdivacky      // Examine the opcode of the user.
2026198090Srdivacky      switch (FlagUI->getMachineOpcode()) {
2027198090Srdivacky      // These comparisons don't treat the most significant bit specially.
2028198090Srdivacky      case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
2029198090Srdivacky      case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
2030198090Srdivacky      case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
2031198090Srdivacky      case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
2032280031Sdim      case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
2033280031Sdim      case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
2034198090Srdivacky      case X86::CMOVA16rr: case X86::CMOVA16rm:
2035198090Srdivacky      case X86::CMOVA32rr: case X86::CMOVA32rm:
2036198090Srdivacky      case X86::CMOVA64rr: case X86::CMOVA64rm:
2037198090Srdivacky      case X86::CMOVAE16rr: case X86::CMOVAE16rm:
2038198090Srdivacky      case X86::CMOVAE32rr: case X86::CMOVAE32rm:
2039198090Srdivacky      case X86::CMOVAE64rr: case X86::CMOVAE64rm:
2040198090Srdivacky      case X86::CMOVB16rr: case X86::CMOVB16rm:
2041198090Srdivacky      case X86::CMOVB32rr: case X86::CMOVB32rm:
2042198090Srdivacky      case X86::CMOVB64rr: case X86::CMOVB64rm:
2043198090Srdivacky      case X86::CMOVBE16rr: case X86::CMOVBE16rm:
2044198090Srdivacky      case X86::CMOVBE32rr: case X86::CMOVBE32rm:
2045198090Srdivacky      case X86::CMOVBE64rr: case X86::CMOVBE64rm:
2046198090Srdivacky      case X86::CMOVE16rr: case X86::CMOVE16rm:
2047198090Srdivacky      case X86::CMOVE32rr: case X86::CMOVE32rm:
2048198090Srdivacky      case X86::CMOVE64rr: case X86::CMOVE64rm:
2049198090Srdivacky      case X86::CMOVNE16rr: case X86::CMOVNE16rm:
2050198090Srdivacky      case X86::CMOVNE32rr: case X86::CMOVNE32rm:
2051198090Srdivacky      case X86::CMOVNE64rr: case X86::CMOVNE64rm:
2052198090Srdivacky      case X86::CMOVNP16rr: case X86::CMOVNP16rm:
2053198090Srdivacky      case X86::CMOVNP32rr: case X86::CMOVNP32rm:
2054198090Srdivacky      case X86::CMOVNP64rr: case X86::CMOVNP64rm:
2055198090Srdivacky      case X86::CMOVP16rr: case X86::CMOVP16rm:
2056198090Srdivacky      case X86::CMOVP32rr: case X86::CMOVP32rm:
2057198090Srdivacky      case X86::CMOVP64rr: case X86::CMOVP64rm:
2058198090Srdivacky        continue;
2059198090Srdivacky      // Anything else: assume conservatively.
2060198090Srdivacky      default: return false;
2061198090Srdivacky      }
2062198090Srdivacky    }
2063198090Srdivacky  }
2064198090Srdivacky  return true;
2065198090Srdivacky}
2066198090Srdivacky
2067296417Sdim/// Check whether or not the chain ending in StoreNode is suitable for doing
2068296417Sdim/// the {load; increment or decrement; store} to modify transformation.
2069239462Sdimstatic bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
2070234353Sdim                                SDValue StoredVal, SelectionDAG *CurDAG,
2071234353Sdim                                LoadSDNode* &LoadNode, SDValue &InputChain) {
2072234353Sdim
2073234353Sdim  // is the value stored the result of a DEC or INC?
2074234353Sdim  if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
2075234353Sdim
2076234353Sdim  // is the stored value result 0 of the load?
2077234353Sdim  if (StoredVal.getResNo() != 0) return false;
2078234353Sdim
2079234353Sdim  // are there other uses of the loaded value than the inc or dec?
2080234353Sdim  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2081234353Sdim
2082234353Sdim  // is the store non-extending and non-indexed?
2083234353Sdim  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2084234353Sdim    return false;
2085234353Sdim
2086234353Sdim  SDValue Load = StoredVal->getOperand(0);
2087234353Sdim  // Is the stored value a non-extending and non-indexed load?
2088234353Sdim  if (!ISD::isNormalLoad(Load.getNode())) return false;
2089234353Sdim
2090234353Sdim  // Return LoadNode by reference.
2091234353Sdim  LoadNode = cast<LoadSDNode>(Load);
2092234353Sdim  // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
2093239462Sdim  EVT LdVT = LoadNode->getMemoryVT();
2094239462Sdim  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
2095234353Sdim      LdVT != MVT::i8)
2096234353Sdim    return false;
2097234353Sdim
2098234353Sdim  // Is store the only read of the loaded value?
2099234353Sdim  if (!Load.hasOneUse())
2100234353Sdim    return false;
2101239462Sdim
2102234353Sdim  // Is the address of the store the same as the load?
2103234353Sdim  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2104234353Sdim      LoadNode->getOffset() != StoreNode->getOffset())
2105234353Sdim    return false;
2106234353Sdim
2107234353Sdim  // Check if the chain is produced by the load or is a TokenFactor with
2108234353Sdim  // the load output chain as an operand. Return InputChain by reference.
2109234353Sdim  SDValue Chain = StoreNode->getChain();
2110234353Sdim
2111234353Sdim  bool ChainCheck = false;
2112234353Sdim  if (Chain == Load.getValue(1)) {
2113234353Sdim    ChainCheck = true;
2114234353Sdim    InputChain = LoadNode->getChain();
2115234353Sdim  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2116234353Sdim    SmallVector<SDValue, 4> ChainOps;
2117234353Sdim    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2118234353Sdim      SDValue Op = Chain.getOperand(i);
2119234353Sdim      if (Op == Load.getValue(1)) {
2120234353Sdim        ChainCheck = true;
2121234353Sdim        continue;
2122234353Sdim      }
2123239462Sdim
2124239462Sdim      // Make sure using Op as part of the chain would not cause a cycle here.
2125239462Sdim      // In theory, we could check whether the chain node is a predecessor of
2126239462Sdim      // the load. But that can be very expensive. Instead visit the uses and
2127239462Sdim      // make sure they all have smaller node id than the load.
2128239462Sdim      int LoadId = LoadNode->getNodeId();
2129239462Sdim      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
2130239462Sdim             UE = UI->use_end(); UI != UE; ++UI) {
2131239462Sdim        if (UI.getUse().getResNo() != 0)
2132239462Sdim          continue;
2133239462Sdim        if (UI->getNodeId() > LoadId)
2134239462Sdim          return false;
2135239462Sdim      }
2136239462Sdim
2137234353Sdim      ChainOps.push_back(Op);
2138234353Sdim    }
2139234353Sdim
2140234353Sdim    if (ChainCheck)
2141234353Sdim      // Make a new TokenFactor with all the other input chains except
2142234353Sdim      // for the load.
2143261991Sdim      InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
2144276479Sdim                                   MVT::Other, ChainOps);
2145234353Sdim  }
2146234353Sdim  if (!ChainCheck)
2147234353Sdim    return false;
2148234353Sdim
2149234353Sdim  return true;
2150234353Sdim}
2151234353Sdim
2152296417Sdim/// Get the appropriate X86 opcode for an in-memory increment or decrement.
2153296417Sdim/// Opc should be X86ISD::DEC or X86ISD::INC.
2154234353Sdimstatic unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
2155234353Sdim  if (Opc == X86ISD::DEC) {
2156234353Sdim    if (LdVT == MVT::i64) return X86::DEC64m;
2157234353Sdim    if (LdVT == MVT::i32) return X86::DEC32m;
2158234353Sdim    if (LdVT == MVT::i16) return X86::DEC16m;
2159234353Sdim    if (LdVT == MVT::i8)  return X86::DEC8m;
2160234353Sdim  } else {
2161234353Sdim    assert(Opc == X86ISD::INC && "unrecognized opcode");
2162234353Sdim    if (LdVT == MVT::i64) return X86::INC64m;
2163234353Sdim    if (LdVT == MVT::i32) return X86::INC32m;
2164234353Sdim    if (LdVT == MVT::i16) return X86::INC16m;
2165234353Sdim    if (LdVT == MVT::i8)  return X86::INC8m;
2166234353Sdim  }
2167234353Sdim  llvm_unreachable("unrecognized size for LdVT");
2168234353Sdim}
2169234353Sdim
2170296417Sdim/// Customized ISel for GATHER operations.
2171296417SdimSDNode *X86DAGToDAGISel::selectGather(SDNode *Node, unsigned Opc) {
2172239462Sdim  // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
2173239462Sdim  SDValue Chain = Node->getOperand(0);
2174239462Sdim  SDValue VSrc = Node->getOperand(2);
2175239462Sdim  SDValue Base = Node->getOperand(3);
2176239462Sdim  SDValue VIdx = Node->getOperand(4);
2177239462Sdim  SDValue VMask = Node->getOperand(5);
2178239462Sdim  ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
2179239462Sdim  if (!Scale)
2180276479Sdim    return nullptr;
2181239462Sdim
2182239462Sdim  SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
2183239462Sdim                                   MVT::Other);
2184239462Sdim
2185288943Sdim  SDLoc DL(Node);
2186288943Sdim
2187239462Sdim  // Memory Operands: Base, Scale, Index, Disp, Segment
2188288943Sdim  SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
2189239462Sdim  SDValue Segment = CurDAG->getRegister(0, MVT::i32);
2190288943Sdim  const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx,
2191239462Sdim                          Disp, Segment, VMask, Chain};
2192288943Sdim  SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2193239462Sdim  // Node has 2 outputs: VDst and MVT::Other.
2194239462Sdim  // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
2195239462Sdim  // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
2196239462Sdim  // of ResNode.
2197239462Sdim  ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
2198239462Sdim  ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
2199239462Sdim  return ResNode;
2200239462Sdim}
2201239462Sdim
2202202375SrdivackySDNode *X86DAGToDAGISel::Select(SDNode *Node) {
2203261991Sdim  MVT NVT = Node->getSimpleValueType(0);
2204193323Sed  unsigned Opc, MOpc;
2205193323Sed  unsigned Opcode = Node->getOpcode();
2206261991Sdim  SDLoc dl(Node);
2207239462Sdim
2208204642Srdivacky  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
2209193323Sed
2210193323Sed  if (Node->isMachineOpcode()) {
2211204642Srdivacky    DEBUG(dbgs() << "== ";  Node->dump(CurDAG); dbgs() << '\n');
2212255804Sdim    Node->setNodeId(-1);
2213276479Sdim    return nullptr;   // Already selected.
2214193323Sed  }
2215193323Sed
2216193323Sed  switch (Opcode) {
2217198090Srdivacky  default: break;
2218296417Sdim  case ISD::BRIND: {
2219296417Sdim    if (Subtarget->isTargetNaCl())
2220296417Sdim      // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
2221296417Sdim      // leave the instruction alone.
2222296417Sdim      break;
2223296417Sdim    if (Subtarget->isTarget64BitILP32()) {
2224296417Sdim      // Converts a 32-bit register to a 64-bit, zero-extended version of
2225296417Sdim      // it. This is needed because x86-64 can do many things, but jmp %r32
2226296417Sdim      // ain't one of them.
2227296417Sdim      const SDValue &Target = Node->getOperand(1);
2228296417Sdim      assert(Target.getSimpleValueType() == llvm::MVT::i32);
2229296417Sdim      SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
2230296417Sdim      SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
2231296417Sdim                                      Node->getOperand(0), ZextTarget);
2232296417Sdim      ReplaceUses(SDValue(Node, 0), Brind);
2233296417Sdim      SelectCode(ZextTarget.getNode());
2234296417Sdim      SelectCode(Brind.getNode());
2235296417Sdim      return nullptr;
2236296417Sdim    }
2237296417Sdim    break;
2238296417Sdim  }
2239239462Sdim  case ISD::INTRINSIC_W_CHAIN: {
2240239462Sdim    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2241239462Sdim    switch (IntNo) {
2242239462Sdim    default: break;
2243239462Sdim    case Intrinsic::x86_avx2_gather_d_pd:
2244239462Sdim    case Intrinsic::x86_avx2_gather_d_pd_256:
2245239462Sdim    case Intrinsic::x86_avx2_gather_q_pd:
2246239462Sdim    case Intrinsic::x86_avx2_gather_q_pd_256:
2247239462Sdim    case Intrinsic::x86_avx2_gather_d_ps:
2248239462Sdim    case Intrinsic::x86_avx2_gather_d_ps_256:
2249239462Sdim    case Intrinsic::x86_avx2_gather_q_ps:
2250239462Sdim    case Intrinsic::x86_avx2_gather_q_ps_256:
2251239462Sdim    case Intrinsic::x86_avx2_gather_d_q:
2252239462Sdim    case Intrinsic::x86_avx2_gather_d_q_256:
2253239462Sdim    case Intrinsic::x86_avx2_gather_q_q:
2254239462Sdim    case Intrinsic::x86_avx2_gather_q_q_256:
2255239462Sdim    case Intrinsic::x86_avx2_gather_d_d:
2256239462Sdim    case Intrinsic::x86_avx2_gather_d_d_256:
2257239462Sdim    case Intrinsic::x86_avx2_gather_q_d:
2258239462Sdim    case Intrinsic::x86_avx2_gather_q_d_256: {
2259261991Sdim      if (!Subtarget->hasAVX2())
2260261991Sdim        break;
2261239462Sdim      unsigned Opc;
2262239462Sdim      switch (IntNo) {
2263239462Sdim      default: llvm_unreachable("Impossible intrinsic");
2264239462Sdim      case Intrinsic::x86_avx2_gather_d_pd:     Opc = X86::VGATHERDPDrm;  break;
2265239462Sdim      case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
2266239462Sdim      case Intrinsic::x86_avx2_gather_q_pd:     Opc = X86::VGATHERQPDrm;  break;
2267239462Sdim      case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
2268239462Sdim      case Intrinsic::x86_avx2_gather_d_ps:     Opc = X86::VGATHERDPSrm;  break;
2269239462Sdim      case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
2270239462Sdim      case Intrinsic::x86_avx2_gather_q_ps:     Opc = X86::VGATHERQPSrm;  break;
2271239462Sdim      case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
2272239462Sdim      case Intrinsic::x86_avx2_gather_d_q:      Opc = X86::VPGATHERDQrm;  break;
2273239462Sdim      case Intrinsic::x86_avx2_gather_d_q_256:  Opc = X86::VPGATHERDQYrm; break;
2274239462Sdim      case Intrinsic::x86_avx2_gather_q_q:      Opc = X86::VPGATHERQQrm;  break;
2275239462Sdim      case Intrinsic::x86_avx2_gather_q_q_256:  Opc = X86::VPGATHERQQYrm; break;
2276239462Sdim      case Intrinsic::x86_avx2_gather_d_d:      Opc = X86::VPGATHERDDrm;  break;
2277239462Sdim      case Intrinsic::x86_avx2_gather_d_d_256:  Opc = X86::VPGATHERDDYrm; break;
2278239462Sdim      case Intrinsic::x86_avx2_gather_q_d:      Opc = X86::VPGATHERQDrm;  break;
2279239462Sdim      case Intrinsic::x86_avx2_gather_q_d_256:  Opc = X86::VPGATHERQDYrm; break;
2280239462Sdim      }
2281296417Sdim      SDNode *RetVal = selectGather(Node, Opc);
2282239462Sdim      if (RetVal)
2283239462Sdim        // We already called ReplaceUses inside SelectGather.
2284276479Sdim        return nullptr;
2285239462Sdim      break;
2286239462Sdim    }
2287239462Sdim    }
2288239462Sdim    break;
2289239462Sdim  }
2290198090Srdivacky  case X86ISD::GlobalBaseReg:
2291198090Srdivacky    return getGlobalBaseReg();
2292193323Sed
2293280031Sdim  case X86ISD::SHRUNKBLEND: {
2294280031Sdim    // SHRUNKBLEND selects like a regular VSELECT.
2295280031Sdim    SDValue VSelect = CurDAG->getNode(
2296280031Sdim        ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
2297280031Sdim        Node->getOperand(1), Node->getOperand(2));
2298280031Sdim    ReplaceUses(SDValue(Node, 0), VSelect);
2299280031Sdim    SelectCode(VSelect.getNode());
2300280031Sdim    // We already called ReplaceUses.
2301280031Sdim    return nullptr;
2302280031Sdim  }
2303239462Sdim
2304223017Sdim  case ISD::ATOMIC_LOAD_XOR:
2305223017Sdim  case ISD::ATOMIC_LOAD_AND:
2306243830Sdim  case ISD::ATOMIC_LOAD_OR:
2307243830Sdim  case ISD::ATOMIC_LOAD_ADD: {
2308296417Sdim    SDNode *RetVal = selectAtomicLoadArith(Node, NVT);
2309223017Sdim    if (RetVal)
2310223017Sdim      return RetVal;
2311223017Sdim    break;
2312223017Sdim  }
2313221345Sdim  case ISD::AND:
2314221345Sdim  case ISD::OR:
2315221345Sdim  case ISD::XOR: {
2316221345Sdim    // For operations of the form (x << C1) op C2, check if we can use a smaller
2317221345Sdim    // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2318221345Sdim    SDValue N0 = Node->getOperand(0);
2319221345Sdim    SDValue N1 = Node->getOperand(1);
2320221345Sdim
2321221345Sdim    if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2322221345Sdim      break;
2323221345Sdim
2324221345Sdim    // i8 is unshrinkable, i16 should be promoted to i32.
2325221345Sdim    if (NVT != MVT::i32 && NVT != MVT::i64)
2326221345Sdim      break;
2327221345Sdim
2328221345Sdim    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
2329221345Sdim    ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2330221345Sdim    if (!Cst || !ShlCst)
2331221345Sdim      break;
2332221345Sdim
2333221345Sdim    int64_t Val = Cst->getSExtValue();
2334221345Sdim    uint64_t ShlVal = ShlCst->getZExtValue();
2335221345Sdim
2336221345Sdim    // Make sure that we don't change the operation by removing bits.
2337221345Sdim    // This only matters for OR and XOR, AND is unaffected.
2338243830Sdim    uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2339243830Sdim    if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2340221345Sdim      break;
2341221345Sdim
2342288943Sdim    unsigned ShlOp, AddOp, Op;
2343261991Sdim    MVT CstVT = NVT;
2344221345Sdim
2345221345Sdim    // Check the minimum bitwidth for the new constant.
2346221345Sdim    // TODO: AND32ri is the same as AND64ri32 with zext imm.
2347221345Sdim    // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2348221345Sdim    // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2349221345Sdim    if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2350221345Sdim      CstVT = MVT::i8;
2351221345Sdim    else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2352221345Sdim      CstVT = MVT::i32;
2353221345Sdim
2354221345Sdim    // Bail if there is no smaller encoding.
2355221345Sdim    if (NVT == CstVT)
2356221345Sdim      break;
2357221345Sdim
2358261991Sdim    switch (NVT.SimpleTy) {
2359221345Sdim    default: llvm_unreachable("Unsupported VT!");
2360221345Sdim    case MVT::i32:
2361221345Sdim      assert(CstVT == MVT::i8);
2362221345Sdim      ShlOp = X86::SHL32ri;
2363288943Sdim      AddOp = X86::ADD32rr;
2364221345Sdim
2365221345Sdim      switch (Opcode) {
2366239462Sdim      default: llvm_unreachable("Impossible opcode");
2367221345Sdim      case ISD::AND: Op = X86::AND32ri8; break;
2368221345Sdim      case ISD::OR:  Op =  X86::OR32ri8; break;
2369221345Sdim      case ISD::XOR: Op = X86::XOR32ri8; break;
2370221345Sdim      }
2371221345Sdim      break;
2372221345Sdim    case MVT::i64:
2373221345Sdim      assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2374221345Sdim      ShlOp = X86::SHL64ri;
2375288943Sdim      AddOp = X86::ADD64rr;
2376221345Sdim
2377221345Sdim      switch (Opcode) {
2378239462Sdim      default: llvm_unreachable("Impossible opcode");
2379221345Sdim      case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2380221345Sdim      case ISD::OR:  Op = CstVT==MVT::i8?  X86::OR64ri8 :  X86::OR64ri32; break;
2381221345Sdim      case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2382221345Sdim      }
2383221345Sdim      break;
2384221345Sdim    }
2385221345Sdim
2386221345Sdim    // Emit the smaller op and the shift.
2387288943Sdim    SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
2388221345Sdim    SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2389288943Sdim    if (ShlVal == 1)
2390288943Sdim      return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
2391288943Sdim                                  SDValue(New, 0));
2392221345Sdim    return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2393288943Sdim                                getI8Imm(ShlVal, dl));
2394221345Sdim  }
2395280031Sdim  case X86ISD::UMUL8:
2396280031Sdim  case X86ISD::SMUL8: {
2397280031Sdim    SDValue N0 = Node->getOperand(0);
2398280031Sdim    SDValue N1 = Node->getOperand(1);
2399280031Sdim
2400280031Sdim    Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
2401280031Sdim
2402280031Sdim    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
2403280031Sdim                                          N0, SDValue()).getValue(1);
2404280031Sdim
2405280031Sdim    SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
2406280031Sdim    SDValue Ops[] = {N1, InFlag};
2407280031Sdim    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2408280031Sdim
2409280031Sdim    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
2410280031Sdim    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
2411280031Sdim    return nullptr;
2412280031Sdim  }
2413280031Sdim
2414218893Sdim  case X86ISD::UMUL: {
2415218893Sdim    SDValue N0 = Node->getOperand(0);
2416218893Sdim    SDValue N1 = Node->getOperand(1);
2417239462Sdim
2418218893Sdim    unsigned LoReg;
2419261991Sdim    switch (NVT.SimpleTy) {
2420218893Sdim    default: llvm_unreachable("Unsupported VT!");
2421218893Sdim    case MVT::i8:  LoReg = X86::AL;  Opc = X86::MUL8r; break;
2422218893Sdim    case MVT::i16: LoReg = X86::AX;  Opc = X86::MUL16r; break;
2423218893Sdim    case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2424218893Sdim    case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2425218893Sdim    }
2426239462Sdim
2427218893Sdim    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2428218893Sdim                                          N0, SDValue()).getValue(1);
2429239462Sdim
2430218893Sdim    SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2431218893Sdim    SDValue Ops[] = {N1, InFlag};
2432251662Sdim    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2433239462Sdim
2434218893Sdim    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
2435218893Sdim    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
2436218893Sdim    ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
2437276479Sdim    return nullptr;
2438218893Sdim  }
2439239462Sdim
2440198090Srdivacky  case ISD::SMUL_LOHI:
2441198090Srdivacky  case ISD::UMUL_LOHI: {
2442198090Srdivacky    SDValue N0 = Node->getOperand(0);
2443198090Srdivacky    SDValue N1 = Node->getOperand(1);
2444193323Sed
2445198090Srdivacky    bool isSigned = Opcode == ISD::SMUL_LOHI;
2446243830Sdim    bool hasBMI2 = Subtarget->hasBMI2();
2447198090Srdivacky    if (!isSigned) {
2448261991Sdim      switch (NVT.SimpleTy) {
2449198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2450198090Srdivacky      case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
2451198090Srdivacky      case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
2452243830Sdim      case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2453243830Sdim                     MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2454243830Sdim      case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2455243830Sdim                     MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2456193323Sed      }
2457198090Srdivacky    } else {
2458261991Sdim      switch (NVT.SimpleTy) {
2459198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2460198090Srdivacky      case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
2461198090Srdivacky      case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
2462198090Srdivacky      case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2463198090Srdivacky      case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2464193323Sed      }
2465198090Srdivacky    }
2466193323Sed
2467243830Sdim    unsigned SrcReg, LoReg, HiReg;
2468243830Sdim    switch (Opc) {
2469243830Sdim    default: llvm_unreachable("Unknown MUL opcode!");
2470243830Sdim    case X86::IMUL8r:
2471243830Sdim    case X86::MUL8r:
2472243830Sdim      SrcReg = LoReg = X86::AL; HiReg = X86::AH;
2473243830Sdim      break;
2474243830Sdim    case X86::IMUL16r:
2475243830Sdim    case X86::MUL16r:
2476243830Sdim      SrcReg = LoReg = X86::AX; HiReg = X86::DX;
2477243830Sdim      break;
2478243830Sdim    case X86::IMUL32r:
2479243830Sdim    case X86::MUL32r:
2480243830Sdim      SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2481243830Sdim      break;
2482243830Sdim    case X86::IMUL64r:
2483243830Sdim    case X86::MUL64r:
2484243830Sdim      SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2485243830Sdim      break;
2486243830Sdim    case X86::MULX32rr:
2487243830Sdim      SrcReg = X86::EDX; LoReg = HiReg = 0;
2488243830Sdim      break;
2489243830Sdim    case X86::MULX64rr:
2490243830Sdim      SrcReg = X86::RDX; LoReg = HiReg = 0;
2491243830Sdim      break;
2492198090Srdivacky    }
2493193323Sed
2494198090Srdivacky    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2495296417Sdim    bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2496198090Srdivacky    // Multiply is commmutative.
2497198090Srdivacky    if (!foldedLoad) {
2498296417Sdim      foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2499198090Srdivacky      if (foldedLoad)
2500198090Srdivacky        std::swap(N0, N1);
2501198090Srdivacky    }
2502193323Sed
2503243830Sdim    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2504239462Sdim                                          N0, SDValue()).getValue(1);
2505243830Sdim    SDValue ResHi, ResLo;
2506198090Srdivacky
2507198090Srdivacky    if (foldedLoad) {
2508243830Sdim      SDValue Chain;
2509198090Srdivacky      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2510198090Srdivacky                        InFlag };
2511243830Sdim      if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
2512243830Sdim        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
2513251662Sdim        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2514243830Sdim        ResHi = SDValue(CNode, 0);
2515243830Sdim        ResLo = SDValue(CNode, 1);
2516243830Sdim        Chain = SDValue(CNode, 2);
2517243830Sdim        InFlag = SDValue(CNode, 3);
2518243830Sdim      } else {
2519243830Sdim        SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2520251662Sdim        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2521243830Sdim        Chain = SDValue(CNode, 0);
2522243830Sdim        InFlag = SDValue(CNode, 1);
2523243830Sdim      }
2524218893Sdim
2525198090Srdivacky      // Update the chain.
2526243830Sdim      ReplaceUses(N1.getValue(1), Chain);
2527198090Srdivacky    } else {
2528243830Sdim      SDValue Ops[] = { N1, InFlag };
2529243830Sdim      if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
2530243830Sdim        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
2531251662Sdim        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2532243830Sdim        ResHi = SDValue(CNode, 0);
2533243830Sdim        ResLo = SDValue(CNode, 1);
2534243830Sdim        InFlag = SDValue(CNode, 2);
2535243830Sdim      } else {
2536243830Sdim        SDVTList VTs = CurDAG->getVTList(MVT::Glue);
2537251662Sdim        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2538243830Sdim        InFlag = SDValue(CNode, 0);
2539243830Sdim      }
2540198090Srdivacky    }
2541198090Srdivacky
2542210299Sed    // Prevent use of AH in a REX instruction by referencing AX instead.
2543210299Sed    if (HiReg == X86::AH && Subtarget->is64Bit() &&
2544210299Sed        !SDValue(Node, 1).use_empty()) {
2545210299Sed      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2546210299Sed                                              X86::AX, MVT::i16, InFlag);
2547210299Sed      InFlag = Result.getValue(2);
2548210299Sed      // Get the low part if needed. Don't use getCopyFromReg for aliasing
2549210299Sed      // registers.
2550210299Sed      if (!SDValue(Node, 0).use_empty())
2551210299Sed        ReplaceUses(SDValue(Node, 1),
2552210299Sed          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2553210299Sed
2554210299Sed      // Shift AX down 8 bits.
2555210299Sed      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2556210299Sed                                              Result,
2557288943Sdim                                     CurDAG->getTargetConstant(8, dl, MVT::i8)),
2558288943Sdim                       0);
2559210299Sed      // Then truncate it down to i8.
2560210299Sed      ReplaceUses(SDValue(Node, 1),
2561210299Sed        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2562210299Sed    }
2563198090Srdivacky    // Copy the low half of the result, if it is needed.
2564202375Srdivacky    if (!SDValue(Node, 0).use_empty()) {
2565276479Sdim      if (!ResLo.getNode()) {
2566243830Sdim        assert(LoReg && "Register for low half is not defined!");
2567243830Sdim        ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
2568243830Sdim                                       InFlag);
2569243830Sdim        InFlag = ResLo.getValue(2);
2570243830Sdim      }
2571243830Sdim      ReplaceUses(SDValue(Node, 0), ResLo);
2572243830Sdim      DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
2573198090Srdivacky    }
2574198090Srdivacky    // Copy the high half of the result, if it is needed.
2575202375Srdivacky    if (!SDValue(Node, 1).use_empty()) {
2576276479Sdim      if (!ResHi.getNode()) {
2577243830Sdim        assert(HiReg && "Register for high half is not defined!");
2578243830Sdim        ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
2579243830Sdim                                       InFlag);
2580243830Sdim        InFlag = ResHi.getValue(2);
2581243830Sdim      }
2582243830Sdim      ReplaceUses(SDValue(Node, 1), ResHi);
2583243830Sdim      DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
2584198090Srdivacky    }
2585239462Sdim
2586276479Sdim    return nullptr;
2587198090Srdivacky  }
2588193323Sed
2589198090Srdivacky  case ISD::SDIVREM:
2590280031Sdim  case ISD::UDIVREM:
2591280031Sdim  case X86ISD::SDIVREM8_SEXT_HREG:
2592280031Sdim  case X86ISD::UDIVREM8_ZEXT_HREG: {
2593198090Srdivacky    SDValue N0 = Node->getOperand(0);
2594198090Srdivacky    SDValue N1 = Node->getOperand(1);
2595193323Sed
2596280031Sdim    bool isSigned = (Opcode == ISD::SDIVREM ||
2597280031Sdim                     Opcode == X86ISD::SDIVREM8_SEXT_HREG);
2598198090Srdivacky    if (!isSigned) {
2599261991Sdim      switch (NVT.SimpleTy) {
2600198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2601198090Srdivacky      case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
2602198090Srdivacky      case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
2603198090Srdivacky      case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
2604198090Srdivacky      case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
2605193323Sed      }
2606198090Srdivacky    } else {
2607261991Sdim      switch (NVT.SimpleTy) {
2608198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2609198090Srdivacky      case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
2610198090Srdivacky      case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
2611198090Srdivacky      case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
2612198090Srdivacky      case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
2613198090Srdivacky      }
2614198090Srdivacky    }
2615193323Sed
2616201360Srdivacky    unsigned LoReg, HiReg, ClrReg;
2617261991Sdim    unsigned SExtOpcode;
2618261991Sdim    switch (NVT.SimpleTy) {
2619198090Srdivacky    default: llvm_unreachable("Unsupported VT!");
2620198090Srdivacky    case MVT::i8:
2621201360Srdivacky      LoReg = X86::AL;  ClrReg = HiReg = X86::AH;
2622198090Srdivacky      SExtOpcode = X86::CBW;
2623198090Srdivacky      break;
2624198090Srdivacky    case MVT::i16:
2625198090Srdivacky      LoReg = X86::AX;  HiReg = X86::DX;
2626261991Sdim      ClrReg = X86::DX;
2627198090Srdivacky      SExtOpcode = X86::CWD;
2628198090Srdivacky      break;
2629198090Srdivacky    case MVT::i32:
2630201360Srdivacky      LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
2631198090Srdivacky      SExtOpcode = X86::CDQ;
2632198090Srdivacky      break;
2633198090Srdivacky    case MVT::i64:
2634201360Srdivacky      LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
2635198090Srdivacky      SExtOpcode = X86::CQO;
2636198090Srdivacky      break;
2637198090Srdivacky    }
2638193323Sed
2639198090Srdivacky    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2640296417Sdim    bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2641198090Srdivacky    bool signBitIsZero = CurDAG->SignBitIsZero(N0);
2642198090Srdivacky
2643198090Srdivacky    SDValue InFlag;
2644198090Srdivacky    if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
2645198090Srdivacky      // Special case for div8, just use a move with zero extension to AX to
2646198090Srdivacky      // clear the upper 8 bits (AH).
2647198090Srdivacky      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
2648296417Sdim      if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2649198090Srdivacky        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
2650198090Srdivacky        Move =
2651223017Sdim          SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
2652251662Sdim                                         MVT::Other, Ops), 0);
2653198090Srdivacky        Chain = Move.getValue(1);
2654198090Srdivacky        ReplaceUses(N0.getValue(1), Chain);
2655193323Sed      } else {
2656198090Srdivacky        Move =
2657223017Sdim          SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
2658198090Srdivacky        Chain = CurDAG->getEntryNode();
2659198090Srdivacky      }
2660223017Sdim      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
2661198090Srdivacky      InFlag = Chain.getValue(1);
2662198090Srdivacky    } else {
2663198090Srdivacky      InFlag =
2664198090Srdivacky        CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
2665198090Srdivacky                             LoReg, N0, SDValue()).getValue(1);
2666198090Srdivacky      if (isSigned && !signBitIsZero) {
2667198090Srdivacky        // Sign extend the low part into the high part.
2668193323Sed        InFlag =
2669218893Sdim          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
2670198090Srdivacky      } else {
2671198090Srdivacky        // Zero out the high part, effectively zero extending the input.
2672280031Sdim        SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
2673261991Sdim        switch (NVT.SimpleTy) {
2674261991Sdim        case MVT::i16:
2675261991Sdim          ClrNode =
2676261991Sdim              SDValue(CurDAG->getMachineNode(
2677261991Sdim                          TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
2678288943Sdim                          CurDAG->getTargetConstant(X86::sub_16bit, dl,
2679288943Sdim                                                    MVT::i32)),
2680261991Sdim                      0);
2681261991Sdim          break;
2682261991Sdim        case MVT::i32:
2683261991Sdim          break;
2684261991Sdim        case MVT::i64:
2685261991Sdim          ClrNode =
2686261991Sdim              SDValue(CurDAG->getMachineNode(
2687261991Sdim                          TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2688288943Sdim                          CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
2689288943Sdim                          CurDAG->getTargetConstant(X86::sub_32bit, dl,
2690288943Sdim                                                    MVT::i32)),
2691261991Sdim                      0);
2692261991Sdim          break;
2693261991Sdim        default:
2694261991Sdim          llvm_unreachable("Unexpected division source");
2695261991Sdim        }
2696261991Sdim
2697201360Srdivacky        InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
2698198090Srdivacky                                      ClrNode, InFlag).getValue(1);
2699193323Sed      }
2700198090Srdivacky    }
2701193323Sed
2702198090Srdivacky    if (foldedLoad) {
2703198090Srdivacky      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2704198090Srdivacky                        InFlag };
2705198090Srdivacky      SDNode *CNode =
2706251662Sdim        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
2707198090Srdivacky      InFlag = SDValue(CNode, 1);
2708198090Srdivacky      // Update the chain.
2709198090Srdivacky      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
2710198090Srdivacky    } else {
2711198090Srdivacky      InFlag =
2712218893Sdim        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
2713198090Srdivacky    }
2714198090Srdivacky
2715280031Sdim    // Prevent use of AH in a REX instruction by explicitly copying it to
2716280031Sdim    // an ABCD_L register.
2717261991Sdim    //
2718261991Sdim    // The current assumption of the register allocator is that isel
2719280031Sdim    // won't generate explicit references to the GR8_ABCD_H registers. If
2720261991Sdim    // the allocator and/or the backend get enhanced to be more robust in
2721261991Sdim    // that regard, this can be, and should be, removed.
2722280031Sdim    if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
2723280031Sdim      SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
2724280031Sdim      unsigned AHExtOpcode =
2725280031Sdim          isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
2726210299Sed
2727280031Sdim      SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
2728280031Sdim                                             MVT::Glue, AHCopy, InFlag);
2729280031Sdim      SDValue Result(RNode, 0);
2730280031Sdim      InFlag = SDValue(RNode, 1);
2731210299Sed
2732280031Sdim      if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
2733280031Sdim          Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
2734280031Sdim        if (Node->getValueType(1) == MVT::i64) {
2735280031Sdim          // It's not possible to directly movsx AH to a 64bit register, because
2736280031Sdim          // the latter needs the REX prefix, but the former can't have it.
2737280031Sdim          assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG &&
2738280031Sdim                 "Unexpected i64 sext of h-register");
2739280031Sdim          Result =
2740280031Sdim              SDValue(CurDAG->getMachineNode(
2741280031Sdim                          TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2742288943Sdim                          CurDAG->getTargetConstant(0, dl, MVT::i64), Result,
2743288943Sdim                          CurDAG->getTargetConstant(X86::sub_32bit, dl,
2744288943Sdim                                                    MVT::i32)),
2745280031Sdim                      0);
2746280031Sdim        }
2747280031Sdim      } else {
2748280031Sdim        Result =
2749280031Sdim            CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
2750280031Sdim      }
2751280031Sdim      ReplaceUses(SDValue(Node, 1), Result);
2752280031Sdim      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2753210299Sed    }
2754198090Srdivacky    // Copy the division (low) result, if it is needed.
2755202375Srdivacky    if (!SDValue(Node, 0).use_empty()) {
2756198090Srdivacky      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2757198090Srdivacky                                                LoReg, NVT, InFlag);
2758198090Srdivacky      InFlag = Result.getValue(2);
2759202375Srdivacky      ReplaceUses(SDValue(Node, 0), Result);
2760204642Srdivacky      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2761198090Srdivacky    }
2762198090Srdivacky    // Copy the remainder (high) result, if it is needed.
2763202375Srdivacky    if (!SDValue(Node, 1).use_empty()) {
2764210299Sed      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2765210299Sed                                              HiReg, NVT, InFlag);
2766210299Sed      InFlag = Result.getValue(2);
2767202375Srdivacky      ReplaceUses(SDValue(Node, 1), Result);
2768204642Srdivacky      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2769198090Srdivacky    }
2770276479Sdim    return nullptr;
2771198090Srdivacky  }
2772193323Sed
2773239462Sdim  case X86ISD::CMP:
2774239462Sdim  case X86ISD::SUB: {
2775239462Sdim    // Sometimes a SUB is used to perform comparison.
2776239462Sdim    if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
2777239462Sdim      // This node is not a CMP.
2778239462Sdim      break;
2779198090Srdivacky    SDValue N0 = Node->getOperand(0);
2780198090Srdivacky    SDValue N1 = Node->getOperand(1);
2781198090Srdivacky
2782280031Sdim    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
2783296417Sdim        hasNoSignedComparisonUses(Node))
2784288943Sdim      N0 = N0.getOperand(0);
2785280031Sdim
2786198090Srdivacky    // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2787198090Srdivacky    // use a smaller encoding.
2788280031Sdim    // Look past the truncate if CMP is the only use of it.
2789234353Sdim    if ((N0.getNode()->getOpcode() == ISD::AND ||
2790234353Sdim         (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
2791234353Sdim        N0.getNode()->hasOneUse() &&
2792198090Srdivacky        N0.getValueType() != MVT::i8 &&
2793198090Srdivacky        X86::isZeroNode(N1)) {
2794198090Srdivacky      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
2795198090Srdivacky      if (!C) break;
2796198090Srdivacky
2797198090Srdivacky      // For example, convert "testl %eax, $8" to "testb %al, $8"
2798198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
2799198090Srdivacky          (!(C->getZExtValue() & 0x80) ||
2800296417Sdim           hasNoSignedComparisonUses(Node))) {
2801288943Sdim        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8);
2802198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2803198090Srdivacky
2804198090Srdivacky        // On x86-32, only the ABCD registers have 8-bit subregisters.
2805198090Srdivacky        if (!Subtarget->is64Bit()) {
2806234353Sdim          const TargetRegisterClass *TRC;
2807261991Sdim          switch (N0.getSimpleValueType().SimpleTy) {
2808198090Srdivacky          case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2809198090Srdivacky          case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2810198090Srdivacky          default: llvm_unreachable("Unsupported TEST operand type!");
2811198090Srdivacky          }
2812288943Sdim          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
2813198090Srdivacky          Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2814198090Srdivacky                                               Reg.getValueType(), Reg, RC), 0);
2815198090Srdivacky        }
2816198090Srdivacky
2817198090Srdivacky        // Extract the l-register.
2818208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
2819198090Srdivacky                                                        MVT::i8, Reg);
2820198090Srdivacky
2821198090Srdivacky        // Emit a testb.
2822243830Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
2823243830Sdim                                                 Subreg, Imm);
2824243830Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2825243830Sdim        // one, do not call ReplaceAllUsesWith.
2826243830Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2827243830Sdim                    SDValue(NewNode, 0));
2828276479Sdim        return nullptr;
2829193323Sed      }
2830198090Srdivacky
2831198090Srdivacky      // For example, "testl %eax, $2048" to "testb %ah, $8".
2832198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
2833198090Srdivacky          (!(C->getZExtValue() & 0x8000) ||
2834296417Sdim           hasNoSignedComparisonUses(Node))) {
2835198090Srdivacky        // Shift the immediate right by 8 bits.
2836198090Srdivacky        SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
2837288943Sdim                                                       dl, MVT::i8);
2838198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2839198090Srdivacky
2840198090Srdivacky        // Put the value in an ABCD register.
2841234353Sdim        const TargetRegisterClass *TRC;
2842261991Sdim        switch (N0.getSimpleValueType().SimpleTy) {
2843198090Srdivacky        case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
2844198090Srdivacky        case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2845198090Srdivacky        case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2846198090Srdivacky        default: llvm_unreachable("Unsupported TEST operand type!");
2847198090Srdivacky        }
2848288943Sdim        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
2849198090Srdivacky        Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2850198090Srdivacky                                             Reg.getValueType(), Reg, RC), 0);
2851198090Srdivacky
2852198090Srdivacky        // Extract the h-register.
2853208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
2854198090Srdivacky                                                        MVT::i8, Reg);
2855198090Srdivacky
2856226633Sdim        // Emit a testb.  The EXTRACT_SUBREG becomes a COPY that can only
2857226633Sdim        // target GR8_NOREX registers, so make sure the register class is
2858226633Sdim        // forced.
2859243830Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
2860243830Sdim                                                 MVT::i32, Subreg, ShiftedImm);
2861243830Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2862243830Sdim        // one, do not call ReplaceAllUsesWith.
2863243830Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2864243830Sdim                    SDValue(NewNode, 0));
2865276479Sdim        return nullptr;
2866193323Sed      }
2867198090Srdivacky
2868198090Srdivacky      // For example, "testl %eax, $32776" to "testw %ax, $32776".
2869198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
2870198090Srdivacky          N0.getValueType() != MVT::i16 &&
2871198090Srdivacky          (!(C->getZExtValue() & 0x8000) ||
2872296417Sdim           hasNoSignedComparisonUses(Node))) {
2873288943Sdim        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
2874288943Sdim                                                MVT::i16);
2875198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2876198090Srdivacky
2877198090Srdivacky        // Extract the 16-bit subregister.
2878208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
2879198090Srdivacky                                                        MVT::i16, Reg);
2880198090Srdivacky
2881198090Srdivacky        // Emit a testw.
2882243830Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
2883243830Sdim                                                 Subreg, Imm);
2884243830Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2885243830Sdim        // one, do not call ReplaceAllUsesWith.
2886243830Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2887243830Sdim                    SDValue(NewNode, 0));
2888276479Sdim        return nullptr;
2889193323Sed      }
2890198090Srdivacky
2891198090Srdivacky      // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
2892198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
2893198090Srdivacky          N0.getValueType() == MVT::i64 &&
2894198090Srdivacky          (!(C->getZExtValue() & 0x80000000) ||
2895296417Sdim           hasNoSignedComparisonUses(Node))) {
2896288943Sdim        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
2897288943Sdim                                                MVT::i32);
2898198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2899198090Srdivacky
2900198090Srdivacky        // Extract the 32-bit subregister.
2901208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
2902198090Srdivacky                                                        MVT::i32, Reg);
2903198090Srdivacky
2904198090Srdivacky        // Emit a testl.
2905243830Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
2906243830Sdim                                                 Subreg, Imm);
2907243830Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2908243830Sdim        // one, do not call ReplaceAllUsesWith.
2909243830Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2910243830Sdim                    SDValue(NewNode, 0));
2911276479Sdim        return nullptr;
2912198090Srdivacky      }
2913193323Sed    }
2914198090Srdivacky    break;
2915193323Sed  }
2916234353Sdim  case ISD::STORE: {
2917234353Sdim    // Change a chain of {load; incr or dec; store} of the same value into
2918234353Sdim    // a simple increment or decrement through memory of that value, if the
2919234353Sdim    // uses of the modified value and its address are suitable.
2920234353Sdim    // The DEC64m tablegen pattern is currently not able to match the case where
2921239462Sdim    // the EFLAGS on the original DEC are used. (This also applies to
2922234353Sdim    // {INC,DEC}X{64,32,16,8}.)
2923234353Sdim    // We'll need to improve tablegen to allow flags to be transferred from a
2924234353Sdim    // node in the pattern to the result node.  probably with a new keyword
2925234353Sdim    // for example, we have this
2926234353Sdim    // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2927234353Sdim    //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2928234353Sdim    //   (implicit EFLAGS)]>;
2929234353Sdim    // but maybe need something like this
2930234353Sdim    // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2931234353Sdim    //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2932234353Sdim    //   (transferrable EFLAGS)]>;
2933234353Sdim
2934234353Sdim    StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2935234353Sdim    SDValue StoredVal = StoreNode->getOperand(1);
2936234353Sdim    unsigned Opc = StoredVal->getOpcode();
2937234353Sdim
2938276479Sdim    LoadSDNode *LoadNode = nullptr;
2939234353Sdim    SDValue InputChain;
2940234353Sdim    if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
2941234353Sdim                             LoadNode, InputChain))
2942234353Sdim      break;
2943234353Sdim
2944234353Sdim    SDValue Base, Scale, Index, Disp, Segment;
2945296417Sdim    if (!selectAddr(LoadNode, LoadNode->getBasePtr(),
2946234353Sdim                    Base, Scale, Index, Disp, Segment))
2947234353Sdim      break;
2948234353Sdim
2949234353Sdim    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2950234353Sdim    MemOp[0] = StoreNode->getMemOperand();
2951234353Sdim    MemOp[1] = LoadNode->getMemOperand();
2952234353Sdim    const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
2953239462Sdim    EVT LdVT = LoadNode->getMemoryVT();
2954234353Sdim    unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
2955234353Sdim    MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
2956261991Sdim                                                   SDLoc(Node),
2957251662Sdim                                                   MVT::i32, MVT::Other, Ops);
2958234353Sdim    Result->setMemRefs(MemOp, MemOp + 2);
2959234353Sdim
2960234353Sdim    ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2961234353Sdim    ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2962234353Sdim
2963234353Sdim    return Result;
2964198090Srdivacky  }
2965234353Sdim  }
2966193323Sed
2967202375Srdivacky  SDNode *ResNode = SelectCode(Node);
2968193323Sed
2969204642Srdivacky  DEBUG(dbgs() << "=> ";
2970276479Sdim        if (ResNode == nullptr || ResNode == Node)
2971204642Srdivacky          Node->dump(CurDAG);
2972204642Srdivacky        else
2973204642Srdivacky          ResNode->dump(CurDAG);
2974204642Srdivacky        dbgs() << '\n');
2975193323Sed
2976193323Sed  return ResNode;
2977193323Sed}
2978193323Sed
2979193323Sedbool X86DAGToDAGISel::
2980288943SdimSelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
2981193323Sed                             std::vector<SDValue> &OutOps) {
2982193323Sed  SDValue Op0, Op1, Op2, Op3, Op4;
2983288943Sdim  switch (ConstraintID) {
2984288943Sdim  default:
2985288943Sdim    llvm_unreachable("Unexpected asm memory constraint");
2986288943Sdim  case InlineAsm::Constraint_i:
2987288943Sdim    // FIXME: It seems strange that 'i' is needed here since it's supposed to
2988288943Sdim    //        be an immediate and not a memory constraint.
2989288943Sdim    // Fallthrough.
2990288943Sdim  case InlineAsm::Constraint_o: // offsetable        ??
2991288943Sdim  case InlineAsm::Constraint_v: // not offsetable    ??
2992288943Sdim  case InlineAsm::Constraint_m: // memory
2993288943Sdim  case InlineAsm::Constraint_X:
2994296417Sdim    if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
2995193323Sed      return true;
2996193323Sed    break;
2997193323Sed  }
2998239462Sdim
2999193323Sed  OutOps.push_back(Op0);
3000193323Sed  OutOps.push_back(Op1);
3001193323Sed  OutOps.push_back(Op2);
3002193323Sed  OutOps.push_back(Op3);
3003193323Sed  OutOps.push_back(Op4);
3004193323Sed  return false;
3005193323Sed}
3006193323Sed
3007296417Sdim/// This pass converts a legalized DAG into a X86-specific DAG,
3008296417Sdim/// ready for instruction scheduling.
3009193323SedFunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
3010234353Sdim                                     CodeGenOpt::Level OptLevel) {
3011193323Sed  return new X86DAGToDAGISel(TM, OptLevel);
3012193323Sed}
3013