X86ISelDAGToDAG.cpp revision 276479
1193323Sed//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file defines a DAG pattern matching instruction selector for X86,
11193323Sed// converting from a legalized dag to a X86 dag.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15193323Sed#include "X86.h"
16193323Sed#include "X86InstrBuilder.h"
17193323Sed#include "X86MachineFunctionInfo.h"
18193323Sed#include "X86RegisterInfo.h"
19193323Sed#include "X86Subtarget.h"
20193323Sed#include "X86TargetMachine.h"
21249423Sdim#include "llvm/ADT/Statistic.h"
22249423Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
23193323Sed#include "llvm/CodeGen/MachineFunction.h"
24193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h"
25193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
26193323Sed#include "llvm/CodeGen/SelectionDAGISel.h"
27249423Sdim#include "llvm/IR/Instructions.h"
28249423Sdim#include "llvm/IR/Intrinsics.h"
29249423Sdim#include "llvm/IR/Type.h"
30193323Sed#include "llvm/Support/Debug.h"
31198090Srdivacky#include "llvm/Support/ErrorHandling.h"
32193323Sed#include "llvm/Support/MathExtras.h"
33198090Srdivacky#include "llvm/Support/raw_ostream.h"
34249423Sdim#include "llvm/Target/TargetMachine.h"
35249423Sdim#include "llvm/Target/TargetOptions.h"
36193323Sedusing namespace llvm;
37193323Sed
38276479Sdim#define DEBUG_TYPE "x86-isel"
39276479Sdim
40193323SedSTATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
41193323Sed
42193323Sed//===----------------------------------------------------------------------===//
43193323Sed//                      Pattern Matcher Implementation
44193323Sed//===----------------------------------------------------------------------===//
45193323Sed
46193323Sednamespace {
47193323Sed  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
48193323Sed  /// SDValue's instead of register numbers for the leaves of the matched
49193323Sed  /// tree.
50193323Sed  struct X86ISelAddressMode {
51193323Sed    enum {
52193323Sed      RegBase,
53193323Sed      FrameIndexBase
54193323Sed    } BaseType;
55193323Sed
56207618Srdivacky    // This is really a union, discriminated by BaseType!
57207618Srdivacky    SDValue Base_Reg;
58207618Srdivacky    int Base_FrameIndex;
59193323Sed
60193323Sed    unsigned Scale;
61239462Sdim    SDValue IndexReg;
62193323Sed    int32_t Disp;
63193323Sed    SDValue Segment;
64207618Srdivacky    const GlobalValue *GV;
65207618Srdivacky    const Constant *CP;
66207618Srdivacky    const BlockAddress *BlockAddr;
67193323Sed    const char *ES;
68193323Sed    int JT;
69193323Sed    unsigned Align;    // CP alignment.
70195098Sed    unsigned char SymbolFlags;  // X86II::MO_*
71193323Sed
72193323Sed    X86ISelAddressMode()
73207618Srdivacky      : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
74276479Sdim        Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
75276479Sdim        JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {
76193323Sed    }
77193323Sed
78193323Sed    bool hasSymbolicDisplacement() const {
79276479Sdim      return GV != nullptr || CP != nullptr || ES != nullptr ||
80276479Sdim             JT != -1 || BlockAddr != nullptr;
81193323Sed    }
82239462Sdim
83195098Sed    bool hasBaseOrIndexReg() const {
84261991Sdim      return BaseType == FrameIndexBase ||
85276479Sdim             IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
86195098Sed    }
87239462Sdim
88195098Sed    /// isRIPRelative - Return true if this addressing mode is already RIP
89195098Sed    /// relative.
90195098Sed    bool isRIPRelative() const {
91195098Sed      if (BaseType != RegBase) return false;
92195098Sed      if (RegisterSDNode *RegNode =
93207618Srdivacky            dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
94195098Sed        return RegNode->getReg() == X86::RIP;
95195098Sed      return false;
96195098Sed    }
97239462Sdim
98195098Sed    void setBaseReg(SDValue Reg) {
99195098Sed      BaseType = RegBase;
100207618Srdivacky      Base_Reg = Reg;
101195098Sed    }
102193323Sed
103243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
104193323Sed    void dump() {
105202375Srdivacky      dbgs() << "X86ISelAddressMode " << this << '\n';
106207618Srdivacky      dbgs() << "Base_Reg ";
107276479Sdim      if (Base_Reg.getNode())
108239462Sdim        Base_Reg.getNode()->dump();
109198090Srdivacky      else
110202375Srdivacky        dbgs() << "nul";
111207618Srdivacky      dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
112198090Srdivacky             << " Scale" << Scale << '\n'
113198090Srdivacky             << "IndexReg ";
114276479Sdim      if (IndexReg.getNode())
115198090Srdivacky        IndexReg.getNode()->dump();
116198090Srdivacky      else
117239462Sdim        dbgs() << "nul";
118202375Srdivacky      dbgs() << " Disp " << Disp << '\n'
119198090Srdivacky             << "GV ";
120198090Srdivacky      if (GV)
121198090Srdivacky        GV->dump();
122198090Srdivacky      else
123202375Srdivacky        dbgs() << "nul";
124202375Srdivacky      dbgs() << " CP ";
125198090Srdivacky      if (CP)
126198090Srdivacky        CP->dump();
127198090Srdivacky      else
128202375Srdivacky        dbgs() << "nul";
129202375Srdivacky      dbgs() << '\n'
130198090Srdivacky             << "ES ";
131198090Srdivacky      if (ES)
132202375Srdivacky        dbgs() << ES;
133198090Srdivacky      else
134202375Srdivacky        dbgs() << "nul";
135202375Srdivacky      dbgs() << " JT" << JT << " Align" << Align << '\n';
136193323Sed    }
137243830Sdim#endif
138193323Sed  };
139193323Sed}
140193323Sed
141193323Sednamespace {
142193323Sed  //===--------------------------------------------------------------------===//
143193323Sed  /// ISel - X86 specific code to select X86 machine instructions for
144193323Sed  /// SelectionDAG operations.
145193323Sed  ///
146276479Sdim  class X86DAGToDAGISel final : public SelectionDAGISel {
147193323Sed    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
148193323Sed    /// make the right decision when generating code for different targets.
149193323Sed    const X86Subtarget *Subtarget;
150193323Sed
151193323Sed    /// OptForSize - If true, selector should try to optimize for code size
152193323Sed    /// instead of performance.
153193323Sed    bool OptForSize;
154193323Sed
155193323Sed  public:
156193323Sed    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
157193323Sed      : SelectionDAGISel(tm, OptLevel),
158193399Sed        Subtarget(&tm.getSubtarget<X86Subtarget>()),
159193323Sed        OptForSize(false) {}
160193323Sed
161276479Sdim    const char *getPassName() const override {
162193323Sed      return "X86 DAG->DAG Instruction Selection";
163193323Sed    }
164193323Sed
165276479Sdim    bool runOnMachineFunction(MachineFunction &MF) override {
166276479Sdim      // Reset the subtarget each time through.
167276479Sdim      Subtarget = &TM.getSubtarget<X86Subtarget>();
168276479Sdim      SelectionDAGISel::runOnMachineFunction(MF);
169276479Sdim      return true;
170276479Sdim    }
171193323Sed
172276479Sdim    void EmitFunctionEntryCode() override;
173193323Sed
174276479Sdim    bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
175203954Srdivacky
176276479Sdim    void PreprocessISelDAG() override;
177276479Sdim
178212904Sdim    inline bool immSext8(SDNode *N) const {
179212904Sdim      return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
180212904Sdim    }
181212904Sdim
182212904Sdim    // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
183212904Sdim    // sign extended field.
184212904Sdim    inline bool i64immSExt32(SDNode *N) const {
185212904Sdim      uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
186212904Sdim      return (int64_t)v == (int32_t)v;
187212904Sdim    }
188212904Sdim
189193323Sed// Include the pieces autogenerated from the target description.
190193323Sed#include "X86GenDAGISel.inc"
191193323Sed
192193323Sed  private:
193276479Sdim    SDNode *Select(SDNode *N) override;
194239462Sdim    SDNode *SelectGather(SDNode *N, unsigned Opc);
195193323Sed    SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
196261991Sdim    SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
197193323Sed
198224145Sdim    bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
199218893Sdim    bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
200193323Sed    bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
201198090Srdivacky    bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
202198090Srdivacky    bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
203198090Srdivacky                                 unsigned Depth);
204193323Sed    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
205218893Sdim    bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
206193323Sed                    SDValue &Scale, SDValue &Index, SDValue &Disp,
207193323Sed                    SDValue &Segment);
208261991Sdim    bool SelectMOV64Imm32(SDValue N, SDValue &Imm);
209218893Sdim    bool SelectLEAAddr(SDValue N, SDValue &Base,
210210299Sed                       SDValue &Scale, SDValue &Index, SDValue &Disp,
211210299Sed                       SDValue &Segment);
212261991Sdim    bool SelectLEA64_32Addr(SDValue N, SDValue &Base,
213261991Sdim                            SDValue &Scale, SDValue &Index, SDValue &Disp,
214261991Sdim                            SDValue &Segment);
215218893Sdim    bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
216210299Sed                           SDValue &Scale, SDValue &Index, SDValue &Disp,
217210299Sed                           SDValue &Segment);
218204642Srdivacky    bool SelectScalarSSELoad(SDNode *Root, SDValue N,
219204642Srdivacky                             SDValue &Base, SDValue &Scale,
220193323Sed                             SDValue &Index, SDValue &Disp,
221193323Sed                             SDValue &Segment,
222204642Srdivacky                             SDValue &NodeWithChain);
223239462Sdim
224202375Srdivacky    bool TryFoldLoad(SDNode *P, SDValue N,
225193323Sed                     SDValue &Base, SDValue &Scale,
226193323Sed                     SDValue &Index, SDValue &Disp,
227193323Sed                     SDValue &Segment);
228239462Sdim
229193323Sed    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
230193323Sed    /// inline asm expressions.
231276479Sdim    bool SelectInlineAsmMemoryOperand(const SDValue &Op,
232276479Sdim                                      char ConstraintCode,
233276479Sdim                                      std::vector<SDValue> &OutOps) override;
234239462Sdim
235193323Sed    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
236193323Sed
237239462Sdim    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
238193323Sed                                   SDValue &Scale, SDValue &Index,
239193323Sed                                   SDValue &Disp, SDValue &Segment) {
240193323Sed      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
241261991Sdim        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
242261991Sdim                                    getTargetLowering()->getPointerTy()) :
243207618Srdivacky        AM.Base_Reg;
244193323Sed      Scale = getI8Imm(AM.Scale);
245193323Sed      Index = AM.IndexReg;
246193323Sed      // These are 32-bit even in 64-bit mode since RIP relative offset
247193323Sed      // is 32-bit.
248193323Sed      if (AM.GV)
249261991Sdim        Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
250210299Sed                                              MVT::i32, AM.Disp,
251195098Sed                                              AM.SymbolFlags);
252193323Sed      else if (AM.CP)
253193323Sed        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
254195098Sed                                             AM.Align, AM.Disp, AM.SymbolFlags);
255243830Sdim      else if (AM.ES) {
256243830Sdim        assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
257195098Sed        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
258243830Sdim      } else if (AM.JT != -1) {
259243830Sdim        assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
260195098Sed        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
261243830Sdim      } else if (AM.BlockAddr)
262243830Sdim        Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
263243830Sdim                                             AM.SymbolFlags);
264193323Sed      else
265193323Sed        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
266193323Sed
267193323Sed      if (AM.Segment.getNode())
268193323Sed        Segment = AM.Segment;
269193323Sed      else
270193323Sed        Segment = CurDAG->getRegister(0, MVT::i32);
271193323Sed    }
272193323Sed
273193323Sed    /// getI8Imm - Return a target constant with the specified value, of type
274193323Sed    /// i8.
275193323Sed    inline SDValue getI8Imm(unsigned Imm) {
276193323Sed      return CurDAG->getTargetConstant(Imm, MVT::i8);
277193323Sed    }
278193323Sed
279193323Sed    /// getI32Imm - Return a target constant with the specified value, of type
280193323Sed    /// i32.
281193323Sed    inline SDValue getI32Imm(unsigned Imm) {
282193323Sed      return CurDAG->getTargetConstant(Imm, MVT::i32);
283193323Sed    }
284193323Sed
285193323Sed    /// getGlobalBaseReg - Return an SDNode that returns the value of
286193323Sed    /// the global base register. Output instructions required to
287193323Sed    /// initialize the global base register, if necessary.
288193323Sed    ///
289193323Sed    SDNode *getGlobalBaseReg();
290193323Sed
291193399Sed    /// getTargetMachine - Return a reference to the TargetMachine, casted
292193399Sed    /// to the target-specific type.
293249423Sdim    const X86TargetMachine &getTargetMachine() const {
294193399Sed      return static_cast<const X86TargetMachine &>(TM);
295193399Sed    }
296193399Sed
297193399Sed    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
298193399Sed    /// to the target-specific type.
299249423Sdim    const X86InstrInfo *getInstrInfo() const {
300193399Sed      return getTargetMachine().getInstrInfo();
301193399Sed    }
302193323Sed  };
303193323Sed}
304193323Sed
305193323Sed
306203954Srdivackybool
307203954SrdivackyX86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
308193323Sed  if (OptLevel == CodeGenOpt::None) return false;
309193323Sed
310203954Srdivacky  if (!N.hasOneUse())
311203954Srdivacky    return false;
312203954Srdivacky
313203954Srdivacky  if (N.getOpcode() != ISD::LOAD)
314203954Srdivacky    return true;
315203954Srdivacky
316203954Srdivacky  // If N is a load, do additional profitability checks.
317203954Srdivacky  if (U == Root) {
318193323Sed    switch (U->getOpcode()) {
319193323Sed    default: break;
320202375Srdivacky    case X86ISD::ADD:
321202375Srdivacky    case X86ISD::SUB:
322202375Srdivacky    case X86ISD::AND:
323202375Srdivacky    case X86ISD::XOR:
324202375Srdivacky    case X86ISD::OR:
325193323Sed    case ISD::ADD:
326193323Sed    case ISD::ADDC:
327193323Sed    case ISD::ADDE:
328193323Sed    case ISD::AND:
329193323Sed    case ISD::OR:
330193323Sed    case ISD::XOR: {
331193323Sed      SDValue Op1 = U->getOperand(1);
332193323Sed
333193323Sed      // If the other operand is a 8-bit immediate we should fold the immediate
334193323Sed      // instead. This reduces code size.
335193323Sed      // e.g.
336193323Sed      // movl 4(%esp), %eax
337193323Sed      // addl $4, %eax
338193323Sed      // vs.
339193323Sed      // movl $4, %eax
340193323Sed      // addl 4(%esp), %eax
341193323Sed      // The former is 2 bytes shorter. In case where the increment is 1, then
342193323Sed      // the saving can be 4 bytes (by using incl %eax).
343193323Sed      if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
344193323Sed        if (Imm->getAPIntValue().isSignedIntN(8))
345193323Sed          return false;
346193323Sed
347193323Sed      // If the other operand is a TLS address, we should fold it instead.
348193323Sed      // This produces
349193323Sed      // movl    %gs:0, %eax
350193323Sed      // leal    i@NTPOFF(%eax), %eax
351193323Sed      // instead of
352193323Sed      // movl    $i@NTPOFF, %eax
353193323Sed      // addl    %gs:0, %eax
354193323Sed      // if the block also has an access to a second TLS address this will save
355193323Sed      // a load.
356276479Sdim      // FIXME: This is probably also true for non-TLS addresses.
357193323Sed      if (Op1.getOpcode() == X86ISD::Wrapper) {
358193323Sed        SDValue Val = Op1.getOperand(0);
359193323Sed        if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
360193323Sed          return false;
361193323Sed      }
362193323Sed    }
363193323Sed    }
364203954Srdivacky  }
365193323Sed
366203954Srdivacky  return true;
367203954Srdivacky}
368203954Srdivacky
369205218Srdivacky/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
370205218Srdivacky/// load's chain operand and move load below the call's chain operand.
371205218Srdivackystatic void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
372243830Sdim                               SDValue Call, SDValue OrigChain) {
373193323Sed  SmallVector<SDValue, 8> Ops;
374205218Srdivacky  SDValue Chain = OrigChain.getOperand(0);
375193323Sed  if (Chain.getNode() == Load.getNode())
376193323Sed    Ops.push_back(Load.getOperand(0));
377193323Sed  else {
378193323Sed    assert(Chain.getOpcode() == ISD::TokenFactor &&
379205218Srdivacky           "Unexpected chain operand");
380193323Sed    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
381193323Sed      if (Chain.getOperand(i).getNode() == Load.getNode())
382193323Sed        Ops.push_back(Load.getOperand(0));
383193323Sed      else
384193323Sed        Ops.push_back(Chain.getOperand(i));
385193323Sed    SDValue NewChain =
386276479Sdim      CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
387193323Sed    Ops.clear();
388193323Sed    Ops.push_back(NewChain);
389193323Sed  }
390205218Srdivacky  for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
391205218Srdivacky    Ops.push_back(OrigChain.getOperand(i));
392276479Sdim  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
393210299Sed  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
394193323Sed                             Load.getOperand(1), Load.getOperand(2));
395243830Sdim
396243830Sdim  unsigned NumOps = Call.getNode()->getNumOperands();
397193323Sed  Ops.clear();
398193323Sed  Ops.push_back(SDValue(Load.getNode(), 1));
399243830Sdim  for (unsigned i = 1, e = NumOps; i != e; ++i)
400193323Sed    Ops.push_back(Call.getOperand(i));
401276479Sdim  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
402193323Sed}
403193323Sed
404193323Sed/// isCalleeLoad - Return true if call address is a load and it can be
405193323Sed/// moved below CALLSEQ_START and the chains leading up to the call.
406193323Sed/// Return the CALLSEQ_START by reference as a second output.
407205218Srdivacky/// In the case of a tail call, there isn't a callseq node between the call
408205218Srdivacky/// chain and the load.
409205218Srdivackystatic bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
410243830Sdim  // The transformation is somewhat dangerous if the call's chain was glued to
411243830Sdim  // the call. After MoveBelowOrigChain the load is moved between the call and
412243830Sdim  // the chain, this can create a cycle if the load is not folded. So it is
413243830Sdim  // *really* important that we are sure the load will be folded.
414193323Sed  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
415193323Sed    return false;
416193323Sed  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
417193323Sed  if (!LD ||
418193323Sed      LD->isVolatile() ||
419193323Sed      LD->getAddressingMode() != ISD::UNINDEXED ||
420193323Sed      LD->getExtensionType() != ISD::NON_EXTLOAD)
421193323Sed    return false;
422193323Sed
423193323Sed  // Now let's find the callseq_start.
424205218Srdivacky  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
425193323Sed    if (!Chain.hasOneUse())
426193323Sed      return false;
427193323Sed    Chain = Chain.getOperand(0);
428193323Sed  }
429205218Srdivacky
430205218Srdivacky  if (!Chain.getNumOperands())
431205218Srdivacky    return false;
432249423Sdim  // Since we are not checking for AA here, conservatively abort if the chain
433249423Sdim  // writes to memory. It's not safe to move the callee (a load) across a store.
434249423Sdim  if (isa<MemSDNode>(Chain.getNode()) &&
435249423Sdim      cast<MemSDNode>(Chain.getNode())->writeMem())
436249423Sdim    return false;
437193323Sed  if (Chain.getOperand(0).getNode() == Callee.getNode())
438193323Sed    return true;
439193323Sed  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
440198090Srdivacky      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
441198090Srdivacky      Callee.getValue(1).hasOneUse())
442193323Sed    return true;
443193323Sed  return false;
444193323Sed}
445193323Sed
446204642Srdivackyvoid X86DAGToDAGISel::PreprocessISelDAG() {
447204792Srdivacky  // OptForSize is used in pattern predicates that isel is matching.
448249423Sdim  OptForSize = MF->getFunction()->getAttributes().
449249423Sdim    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
450239462Sdim
451204642Srdivacky  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
452204642Srdivacky       E = CurDAG->allnodes_end(); I != E; ) {
453204642Srdivacky    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
454193323Sed
455205218Srdivacky    if (OptLevel != CodeGenOpt::None &&
456249423Sdim        // Only does this when target favors doesn't favor register indirect
457249423Sdim        // call.
458249423Sdim        ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
459243830Sdim         (N->getOpcode() == X86ISD::TC_RETURN &&
460249423Sdim          // Only does this if load can be folded into TC_RETURN.
461243830Sdim          (Subtarget->is64Bit() ||
462243830Sdim           getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
463193323Sed      /// Also try moving call address load from outside callseq_start to just
464193323Sed      /// before the call to allow it to be folded.
465193323Sed      ///
466193323Sed      ///     [Load chain]
467193323Sed      ///         ^
468193323Sed      ///         |
469193323Sed      ///       [Load]
470193323Sed      ///       ^    ^
471193323Sed      ///       |    |
472193323Sed      ///      /      \--
473193323Sed      ///     /          |
474193323Sed      ///[CALLSEQ_START] |
475193323Sed      ///     ^          |
476193323Sed      ///     |          |
477193323Sed      /// [LOAD/C2Reg]   |
478193323Sed      ///     |          |
479193323Sed      ///      \        /
480193323Sed      ///       \      /
481193323Sed      ///       [CALL]
482205218Srdivacky      bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
483204642Srdivacky      SDValue Chain = N->getOperand(0);
484204642Srdivacky      SDValue Load  = N->getOperand(1);
485205218Srdivacky      if (!isCalleeLoad(Load, Chain, HasCallSeq))
486193323Sed        continue;
487205218Srdivacky      MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
488193323Sed      ++NumLoadMoved;
489193323Sed      continue;
490193323Sed    }
491239462Sdim
492204642Srdivacky    // Lower fpround and fpextend nodes that target the FP stack to be store and
493204642Srdivacky    // load to the stack.  This is a gross hack.  We would like to simply mark
494204642Srdivacky    // these as being illegal, but when we do that, legalize produces these when
495204642Srdivacky    // it expands calls, then expands these in the same legalize pass.  We would
496204642Srdivacky    // like dag combine to be able to hack on these between the call expansion
497204642Srdivacky    // and the node legalization.  As such this pass basically does "really
498204642Srdivacky    // late" legalization of these inline with the X86 isel pass.
499204642Srdivacky    // FIXME: This should only happen when not compiled with -O0.
500193323Sed    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
501193323Sed      continue;
502239462Sdim
503261991Sdim    MVT SrcVT = N->getOperand(0).getSimpleValueType();
504261991Sdim    MVT DstVT = N->getSimpleValueType(0);
505226633Sdim
506226633Sdim    // If any of the sources are vectors, no fp stack involved.
507226633Sdim    if (SrcVT.isVector() || DstVT.isVector())
508226633Sdim      continue;
509226633Sdim
510193323Sed    // If the source and destination are SSE registers, then this is a legal
511193323Sed    // conversion that should not be lowered.
512261991Sdim    const X86TargetLowering *X86Lowering =
513261991Sdim        static_cast<const X86TargetLowering *>(getTargetLowering());
514261991Sdim    bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
515261991Sdim    bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
516193323Sed    if (SrcIsSSE && DstIsSSE)
517193323Sed      continue;
518193323Sed
519193323Sed    if (!SrcIsSSE && !DstIsSSE) {
520193323Sed      // If this is an FPStack extension, it is a noop.
521193323Sed      if (N->getOpcode() == ISD::FP_EXTEND)
522193323Sed        continue;
523193323Sed      // If this is a value-preserving FPStack truncation, it is a noop.
524193323Sed      if (N->getConstantOperandVal(1))
525193323Sed        continue;
526193323Sed    }
527239462Sdim
528193323Sed    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
529193323Sed    // FPStack has extload and truncstore.  SSE can fold direct loads into other
530193323Sed    // operations.  Based on this, decide what we want to do.
531261991Sdim    MVT MemVT;
532193323Sed    if (N->getOpcode() == ISD::FP_ROUND)
533193323Sed      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
534193323Sed    else
535193323Sed      MemVT = SrcIsSSE ? SrcVT : DstVT;
536239462Sdim
537193323Sed    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
538261991Sdim    SDLoc dl(N);
539239462Sdim
540193323Sed    // FIXME: optimize the case where the src/dest is a load or store?
541193323Sed    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
542193323Sed                                          N->getOperand(0),
543218893Sdim                                          MemTmp, MachinePointerInfo(), MemVT,
544203954Srdivacky                                          false, false, 0);
545218893Sdim    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
546218893Sdim                                        MachinePointerInfo(),
547218893Sdim                                        MemVT, false, false, 0);
548193323Sed
549193323Sed    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
550193323Sed    // extload we created.  This will cause general havok on the dag because
551193323Sed    // anything below the conversion could be folded into other existing nodes.
552193323Sed    // To avoid invalidating 'I', back it up to the convert node.
553193323Sed    --I;
554193323Sed    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
555239462Sdim
556193323Sed    // Now that we did that, the node is dead.  Increment the iterator to the
557193323Sed    // next node to process, then delete N.
558193323Sed    ++I;
559193323Sed    CurDAG->DeleteNode(N);
560239462Sdim  }
561193323Sed}
562193323Sed
563193323Sed
564193323Sed/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
565193323Sed/// the main function.
566193323Sedvoid X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
567193323Sed                                             MachineFrameInfo *MFI) {
568193323Sed  const TargetInstrInfo *TII = TM.getInstrInfo();
569218893Sdim  if (Subtarget->isTargetCygMing()) {
570218893Sdim    unsigned CallOp =
571234353Sdim      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
572206124Srdivacky    BuildMI(BB, DebugLoc(),
573218893Sdim            TII->get(CallOp)).addExternalSymbol("__main");
574218893Sdim  }
575193323Sed}
576193323Sed
577207618Srdivackyvoid X86DAGToDAGISel::EmitFunctionEntryCode() {
578193323Sed  // If this is main, emit special code for main.
579207618Srdivacky  if (const Function *Fn = MF->getFunction())
580207618Srdivacky    if (Fn->hasExternalLinkage() && Fn->getName() == "main")
581207618Srdivacky      EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
582193323Sed}
583193323Sed
584224145Sdimstatic bool isDispSafeForFrameIndex(int64_t Val) {
585224145Sdim  // On 64-bit platforms, we can run into an issue where a frame index
586224145Sdim  // includes a displacement that, when added to the explicit displacement,
587224145Sdim  // will overflow the displacement field. Assuming that the frame index
588224145Sdim  // displacement fits into a 31-bit integer  (which is only slightly more
589224145Sdim  // aggressive than the current fundamental assumption that it fits into
590224145Sdim  // a 32-bit integer), a 31-bit disp should always be safe.
591224145Sdim  return isInt<31>(Val);
592224145Sdim}
593193323Sed
594224145Sdimbool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
595224145Sdim                                            X86ISelAddressMode &AM) {
596224145Sdim  int64_t Val = AM.Disp + Offset;
597224145Sdim  CodeModel::Model M = TM.getCodeModel();
598224145Sdim  if (Subtarget->is64Bit()) {
599224145Sdim    if (!X86::isOffsetSuitableForCodeModel(Val, M,
600224145Sdim                                           AM.hasSymbolicDisplacement()))
601224145Sdim      return true;
602224145Sdim    // In addition to the checks required for a register base, check that
603224145Sdim    // we do not try to use an unsafe Disp with a frame index.
604224145Sdim    if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
605224145Sdim        !isDispSafeForFrameIndex(Val))
606224145Sdim      return true;
607224145Sdim  }
608224145Sdim  AM.Disp = Val;
609224145Sdim  return false;
610224145Sdim
611224145Sdim}
612224145Sdim
613218893Sdimbool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
614218893Sdim  SDValue Address = N->getOperand(1);
615239462Sdim
616218893Sdim  // load gs:0 -> GS segment register.
617218893Sdim  // load fs:0 -> FS segment register.
618218893Sdim  //
619193323Sed  // This optimization is valid because the GNU TLS model defines that
620193323Sed  // gs:0 (or fs:0 on X86-64) contains its own address.
621193323Sed  // For more information see http://people.redhat.com/drepper/tls.pdf
622218893Sdim  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
623276479Sdim    if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
624239462Sdim        Subtarget->isTargetLinux())
625218893Sdim      switch (N->getPointerInfo().getAddrSpace()) {
626218893Sdim      case 256:
627218893Sdim        AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
628218893Sdim        return false;
629218893Sdim      case 257:
630218893Sdim        AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
631218893Sdim        return false;
632218893Sdim      }
633239462Sdim
634193323Sed  return true;
635193323Sed}
636193323Sed
637195098Sed/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
638195098Sed/// into an addressing mode.  These wrap things that will resolve down into a
639195098Sed/// symbol reference.  If no match is possible, this returns true, otherwise it
640198090Srdivacky/// returns false.
641193323Sedbool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
642195098Sed  // If the addressing mode already has a symbol as the displacement, we can
643195098Sed  // never match another symbol.
644193323Sed  if (AM.hasSymbolicDisplacement())
645193323Sed    return true;
646193323Sed
647193323Sed  SDValue N0 = N.getOperand(0);
648198090Srdivacky  CodeModel::Model M = TM.getCodeModel();
649198090Srdivacky
650195098Sed  // Handle X86-64 rip-relative addresses.  We check this before checking direct
651195098Sed  // folding because RIP is preferable to non-RIP accesses.
652234353Sdim  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
653195098Sed      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
654195098Sed      // they cannot be folded into immediate fields.
655195098Sed      // FIXME: This can be improved for kernel and other models?
656234353Sdim      (M == CodeModel::Small || M == CodeModel::Kernel)) {
657234353Sdim    // Base and index reg must be 0 in order to use %rip as base.
658234353Sdim    if (AM.hasBaseOrIndexReg())
659234353Sdim      return true;
660195098Sed    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
661224145Sdim      X86ISelAddressMode Backup = AM;
662195098Sed      AM.GV = G->getGlobal();
663195098Sed      AM.SymbolFlags = G->getTargetFlags();
664224145Sdim      if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
665224145Sdim        AM = Backup;
666224145Sdim        return true;
667224145Sdim      }
668195098Sed    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
669224145Sdim      X86ISelAddressMode Backup = AM;
670195098Sed      AM.CP = CP->getConstVal();
671195098Sed      AM.Align = CP->getAlignment();
672195098Sed      AM.SymbolFlags = CP->getTargetFlags();
673224145Sdim      if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
674224145Sdim        AM = Backup;
675224145Sdim        return true;
676224145Sdim      }
677195098Sed    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
678195098Sed      AM.ES = S->getSymbol();
679195098Sed      AM.SymbolFlags = S->getTargetFlags();
680198892Srdivacky    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
681195098Sed      AM.JT = J->getIndex();
682195098Sed      AM.SymbolFlags = J->getTargetFlags();
683243830Sdim    } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
684243830Sdim      X86ISelAddressMode Backup = AM;
685243830Sdim      AM.BlockAddr = BA->getBlockAddress();
686243830Sdim      AM.SymbolFlags = BA->getTargetFlags();
687243830Sdim      if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
688243830Sdim        AM = Backup;
689243830Sdim        return true;
690243830Sdim      }
691243830Sdim    } else
692243830Sdim      llvm_unreachable("Unhandled symbol reference node.");
693198090Srdivacky
694195098Sed    if (N.getOpcode() == X86ISD::WrapperRIP)
695195098Sed      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
696195098Sed    return false;
697195098Sed  }
698195098Sed
699195098Sed  // Handle the case when globals fit in our immediate field: This is true for
700234353Sdim  // X86-32 always and X86-64 when in -mcmodel=small mode.  In 64-bit
701234353Sdim  // mode, this only applies to a non-RIP-relative computation.
702195098Sed  if (!Subtarget->is64Bit() ||
703234353Sdim      M == CodeModel::Small || M == CodeModel::Kernel) {
704234353Sdim    assert(N.getOpcode() != X86ISD::WrapperRIP &&
705234353Sdim           "RIP-relative addressing already handled");
706195098Sed    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
707195098Sed      AM.GV = G->getGlobal();
708195098Sed      AM.Disp += G->getOffset();
709195098Sed      AM.SymbolFlags = G->getTargetFlags();
710195098Sed    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
711193323Sed      AM.CP = CP->getConstVal();
712193323Sed      AM.Align = CP->getAlignment();
713195098Sed      AM.Disp += CP->getOffset();
714195098Sed      AM.SymbolFlags = CP->getTargetFlags();
715195098Sed    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
716195098Sed      AM.ES = S->getSymbol();
717195098Sed      AM.SymbolFlags = S->getTargetFlags();
718198892Srdivacky    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
719195098Sed      AM.JT = J->getIndex();
720195098Sed      AM.SymbolFlags = J->getTargetFlags();
721243830Sdim    } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
722243830Sdim      AM.BlockAddr = BA->getBlockAddress();
723243830Sdim      AM.Disp += BA->getOffset();
724243830Sdim      AM.SymbolFlags = BA->getTargetFlags();
725243830Sdim    } else
726243830Sdim      llvm_unreachable("Unhandled symbol reference node.");
727193323Sed    return false;
728193323Sed  }
729193323Sed
730193323Sed  return true;
731193323Sed}
732193323Sed
733193323Sed/// MatchAddress - Add the specified node to the specified addressing mode,
734193323Sed/// returning true if it cannot be done.  This just pattern matches for the
735193323Sed/// addressing mode.
736198090Srdivackybool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
737210299Sed  if (MatchAddressRecursively(N, AM, 0))
738198090Srdivacky    return true;
739198090Srdivacky
740198090Srdivacky  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
741198090Srdivacky  // a smaller encoding and avoids a scaled-index.
742198090Srdivacky  if (AM.Scale == 2 &&
743198090Srdivacky      AM.BaseType == X86ISelAddressMode::RegBase &&
744276479Sdim      AM.Base_Reg.getNode() == nullptr) {
745207618Srdivacky    AM.Base_Reg = AM.IndexReg;
746198090Srdivacky    AM.Scale = 1;
747198090Srdivacky  }
748198090Srdivacky
749198090Srdivacky  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
750198090Srdivacky  // because it has a smaller encoding.
751198090Srdivacky  // TODO: Which other code models can use this?
752198090Srdivacky  if (TM.getCodeModel() == CodeModel::Small &&
753198090Srdivacky      Subtarget->is64Bit() &&
754198090Srdivacky      AM.Scale == 1 &&
755198090Srdivacky      AM.BaseType == X86ISelAddressMode::RegBase &&
756276479Sdim      AM.Base_Reg.getNode() == nullptr &&
757276479Sdim      AM.IndexReg.getNode() == nullptr &&
758198090Srdivacky      AM.SymbolFlags == X86II::MO_NO_FLAG &&
759198090Srdivacky      AM.hasSymbolicDisplacement())
760207618Srdivacky    AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
761198090Srdivacky
762198090Srdivacky  return false;
763198090Srdivacky}
764198090Srdivacky
765234353Sdim// Insert a node into the DAG at least before the Pos node's position. This
766234353Sdim// will reposition the node as needed, and will assign it a node ID that is <=
767234353Sdim// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
768234353Sdim// IDs! The selection DAG must no longer depend on their uniqueness when this
769234353Sdim// is used.
770234353Sdimstatic void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
771234353Sdim  if (N.getNode()->getNodeId() == -1 ||
772234353Sdim      N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
773234353Sdim    DAG.RepositionNode(Pos.getNode(), N.getNode());
774234353Sdim    N.getNode()->setNodeId(Pos.getNode()->getNodeId());
775234353Sdim  }
776234353Sdim}
777234353Sdim
778234353Sdim// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
779234353Sdim// allows us to convert the shift and and into an h-register extract and
780234353Sdim// a scaled index. Returns false if the simplification is performed.
781234353Sdimstatic bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
782234353Sdim                                      uint64_t Mask,
783234353Sdim                                      SDValue Shift, SDValue X,
784234353Sdim                                      X86ISelAddressMode &AM) {
785234353Sdim  if (Shift.getOpcode() != ISD::SRL ||
786234353Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)) ||
787234353Sdim      !Shift.hasOneUse())
788234353Sdim    return true;
789234353Sdim
790234353Sdim  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
791234353Sdim  if (ScaleLog <= 0 || ScaleLog >= 4 ||
792234353Sdim      Mask != (0xffu << ScaleLog))
793234353Sdim    return true;
794234353Sdim
795261991Sdim  MVT VT = N.getSimpleValueType();
796261991Sdim  SDLoc DL(N);
797234353Sdim  SDValue Eight = DAG.getConstant(8, MVT::i8);
798234353Sdim  SDValue NewMask = DAG.getConstant(0xff, VT);
799234353Sdim  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
800234353Sdim  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
801234353Sdim  SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
802234353Sdim  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
803234353Sdim
804234353Sdim  // Insert the new nodes into the topological ordering. We must do this in
805234353Sdim  // a valid topological ordering as nothing is going to go back and re-sort
806234353Sdim  // these nodes. We continually insert before 'N' in sequence as this is
807234353Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
808234353Sdim  // hierarchy left to express.
809234353Sdim  InsertDAGNode(DAG, N, Eight);
810234353Sdim  InsertDAGNode(DAG, N, Srl);
811234353Sdim  InsertDAGNode(DAG, N, NewMask);
812234353Sdim  InsertDAGNode(DAG, N, And);
813234353Sdim  InsertDAGNode(DAG, N, ShlCount);
814234353Sdim  InsertDAGNode(DAG, N, Shl);
815234353Sdim  DAG.ReplaceAllUsesWith(N, Shl);
816234353Sdim  AM.IndexReg = And;
817234353Sdim  AM.Scale = (1 << ScaleLog);
818234353Sdim  return false;
819234353Sdim}
820234353Sdim
821234353Sdim// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
822234353Sdim// allows us to fold the shift into this addressing mode. Returns false if the
823234353Sdim// transform succeeded.
824234353Sdimstatic bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
825234353Sdim                                        uint64_t Mask,
826234353Sdim                                        SDValue Shift, SDValue X,
827234353Sdim                                        X86ISelAddressMode &AM) {
828234353Sdim  if (Shift.getOpcode() != ISD::SHL ||
829234353Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)))
830234353Sdim    return true;
831234353Sdim
832234353Sdim  // Not likely to be profitable if either the AND or SHIFT node has more
833234353Sdim  // than one use (unless all uses are for address computation). Besides,
834234353Sdim  // isel mechanism requires their node ids to be reused.
835234353Sdim  if (!N.hasOneUse() || !Shift.hasOneUse())
836234353Sdim    return true;
837234353Sdim
838234353Sdim  // Verify that the shift amount is something we can fold.
839234353Sdim  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
840234353Sdim  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
841234353Sdim    return true;
842234353Sdim
843261991Sdim  MVT VT = N.getSimpleValueType();
844261991Sdim  SDLoc DL(N);
845234353Sdim  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
846234353Sdim  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
847234353Sdim  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
848234353Sdim
849234353Sdim  // Insert the new nodes into the topological ordering. We must do this in
850234353Sdim  // a valid topological ordering as nothing is going to go back and re-sort
851234353Sdim  // these nodes. We continually insert before 'N' in sequence as this is
852234353Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
853234353Sdim  // hierarchy left to express.
854234353Sdim  InsertDAGNode(DAG, N, NewMask);
855234353Sdim  InsertDAGNode(DAG, N, NewAnd);
856234353Sdim  InsertDAGNode(DAG, N, NewShift);
857234353Sdim  DAG.ReplaceAllUsesWith(N, NewShift);
858234353Sdim
859234353Sdim  AM.Scale = 1 << ShiftAmt;
860234353Sdim  AM.IndexReg = NewAnd;
861234353Sdim  return false;
862234353Sdim}
863234353Sdim
864234353Sdim// Implement some heroics to detect shifts of masked values where the mask can
865234353Sdim// be replaced by extending the shift and undoing that in the addressing mode
866234353Sdim// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
867234353Sdim// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
868234353Sdim// the addressing mode. This results in code such as:
869234353Sdim//
870234353Sdim//   int f(short *y, int *lookup_table) {
871234353Sdim//     ...
872234353Sdim//     return *y + lookup_table[*y >> 11];
873234353Sdim//   }
874234353Sdim//
875234353Sdim// Turning into:
876234353Sdim//   movzwl (%rdi), %eax
877234353Sdim//   movl %eax, %ecx
878234353Sdim//   shrl $11, %ecx
879234353Sdim//   addl (%rsi,%rcx,4), %eax
880234353Sdim//
881234353Sdim// Instead of:
882234353Sdim//   movzwl (%rdi), %eax
883234353Sdim//   movl %eax, %ecx
884234353Sdim//   shrl $9, %ecx
885234353Sdim//   andl $124, %rcx
886234353Sdim//   addl (%rsi,%rcx), %eax
887234353Sdim//
888234353Sdim// Note that this function assumes the mask is provided as a mask *after* the
889234353Sdim// value is shifted. The input chain may or may not match that, but computing
890234353Sdim// such a mask is trivial.
891234353Sdimstatic bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
892234353Sdim                                    uint64_t Mask,
893234353Sdim                                    SDValue Shift, SDValue X,
894234353Sdim                                    X86ISelAddressMode &AM) {
895234353Sdim  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
896234353Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)))
897234353Sdim    return true;
898234353Sdim
899234353Sdim  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
900261991Sdim  unsigned MaskLZ = countLeadingZeros(Mask);
901261991Sdim  unsigned MaskTZ = countTrailingZeros(Mask);
902234353Sdim
903234353Sdim  // The amount of shift we're trying to fit into the addressing mode is taken
904234353Sdim  // from the trailing zeros of the mask.
905234353Sdim  unsigned AMShiftAmt = MaskTZ;
906234353Sdim
907234353Sdim  // There is nothing we can do here unless the mask is removing some bits.
908234353Sdim  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
909234353Sdim  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
910234353Sdim
911234353Sdim  // We also need to ensure that mask is a continuous run of bits.
912234353Sdim  if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
913234353Sdim
914234353Sdim  // Scale the leading zero count down based on the actual size of the value.
915234353Sdim  // Also scale it down based on the size of the shift.
916261991Sdim  MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
917234353Sdim
918234353Sdim  // The final check is to ensure that any masked out high bits of X are
919234353Sdim  // already known to be zero. Otherwise, the mask has a semantic impact
920234353Sdim  // other than masking out a couple of low bits. Unfortunately, because of
921234353Sdim  // the mask, zero extensions will be removed from operands in some cases.
922234353Sdim  // This code works extra hard to look through extensions because we can
923234353Sdim  // replace them with zero extensions cheaply if necessary.
924234353Sdim  bool ReplacingAnyExtend = false;
925234353Sdim  if (X.getOpcode() == ISD::ANY_EXTEND) {
926261991Sdim    unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
927261991Sdim                          X.getOperand(0).getSimpleValueType().getSizeInBits();
928234353Sdim    // Assume that we'll replace the any-extend with a zero-extend, and
929234353Sdim    // narrow the search to the extended value.
930234353Sdim    X = X.getOperand(0);
931234353Sdim    MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
932234353Sdim    ReplacingAnyExtend = true;
933234353Sdim  }
934261991Sdim  APInt MaskedHighBits =
935261991Sdim    APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
936234353Sdim  APInt KnownZero, KnownOne;
937276479Sdim  DAG.computeKnownBits(X, KnownZero, KnownOne);
938234353Sdim  if (MaskedHighBits != KnownZero) return true;
939234353Sdim
940234353Sdim  // We've identified a pattern that can be transformed into a single shift
941234353Sdim  // and an addressing mode. Make it so.
942261991Sdim  MVT VT = N.getSimpleValueType();
943234353Sdim  if (ReplacingAnyExtend) {
944234353Sdim    assert(X.getValueType() != VT);
945234353Sdim    // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
946261991Sdim    SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
947234353Sdim    InsertDAGNode(DAG, N, NewX);
948234353Sdim    X = NewX;
949234353Sdim  }
950261991Sdim  SDLoc DL(N);
951234353Sdim  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8);
952234353Sdim  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
953234353Sdim  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
954234353Sdim  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
955234353Sdim
956234353Sdim  // Insert the new nodes into the topological ordering. We must do this in
957234353Sdim  // a valid topological ordering as nothing is going to go back and re-sort
958234353Sdim  // these nodes. We continually insert before 'N' in sequence as this is
959234353Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
960234353Sdim  // hierarchy left to express.
961234353Sdim  InsertDAGNode(DAG, N, NewSRLAmt);
962234353Sdim  InsertDAGNode(DAG, N, NewSRL);
963234353Sdim  InsertDAGNode(DAG, N, NewSHLAmt);
964234353Sdim  InsertDAGNode(DAG, N, NewSHL);
965234353Sdim  DAG.ReplaceAllUsesWith(N, NewSHL);
966234353Sdim
967234353Sdim  AM.Scale = 1 << AMShiftAmt;
968234353Sdim  AM.IndexReg = NewSRL;
969234353Sdim  return false;
970234353Sdim}
971234353Sdim
972198090Srdivackybool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
973198090Srdivacky                                              unsigned Depth) {
974261991Sdim  SDLoc dl(N);
975198090Srdivacky  DEBUG({
976202375Srdivacky      dbgs() << "MatchAddress: ";
977198090Srdivacky      AM.dump();
978198090Srdivacky    });
979193323Sed  // Limit recursion.
980193323Sed  if (Depth > 5)
981193323Sed    return MatchAddressBase(N, AM);
982198090Srdivacky
983195098Sed  // If this is already a %rip relative address, we can only merge immediates
984195098Sed  // into it.  Instead of handling this in every case, we handle it here.
985193323Sed  // RIP relative addressing: %rip + 32-bit displacement!
986195098Sed  if (AM.isRIPRelative()) {
987195098Sed    // FIXME: JumpTable and ExternalSymbol address currently don't like
988195098Sed    // displacements.  It isn't very important, but this should be fixed for
989195098Sed    // consistency.
990195098Sed    if (!AM.ES && AM.JT != -1) return true;
991198090Srdivacky
992224145Sdim    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
993224145Sdim      if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
994193323Sed        return false;
995193323Sed    return true;
996193323Sed  }
997193323Sed
998193323Sed  switch (N.getOpcode()) {
999193323Sed  default: break;
1000193323Sed  case ISD::Constant: {
1001193323Sed    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1002224145Sdim    if (!FoldOffsetIntoAddress(Val, AM))
1003193323Sed      return false;
1004193323Sed    break;
1005193323Sed  }
1006193323Sed
1007193323Sed  case X86ISD::Wrapper:
1008195098Sed  case X86ISD::WrapperRIP:
1009193323Sed    if (!MatchWrapper(N, AM))
1010193323Sed      return false;
1011193323Sed    break;
1012193323Sed
1013193323Sed  case ISD::LOAD:
1014218893Sdim    if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
1015193323Sed      return false;
1016193323Sed    break;
1017193323Sed
1018193323Sed  case ISD::FrameIndex:
1019224145Sdim    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1020276479Sdim        AM.Base_Reg.getNode() == nullptr &&
1021224145Sdim        (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1022193323Sed      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1023207618Srdivacky      AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1024193323Sed      return false;
1025193323Sed    }
1026193323Sed    break;
1027193323Sed
1028193323Sed  case ISD::SHL:
1029276479Sdim    if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1030193323Sed      break;
1031239462Sdim
1032193323Sed    if (ConstantSDNode
1033193323Sed          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
1034193323Sed      unsigned Val = CN->getZExtValue();
1035198090Srdivacky      // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1036198090Srdivacky      // that the base operand remains free for further matching. If
1037198090Srdivacky      // the base doesn't end up getting used, a post-processing step
1038198090Srdivacky      // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1039193323Sed      if (Val == 1 || Val == 2 || Val == 3) {
1040193323Sed        AM.Scale = 1 << Val;
1041193323Sed        SDValue ShVal = N.getNode()->getOperand(0);
1042193323Sed
1043193323Sed        // Okay, we know that we have a scale by now.  However, if the scaled
1044193323Sed        // value is an add of something and a constant, we can fold the
1045193323Sed        // constant into the disp field here.
1046218893Sdim        if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1047193323Sed          AM.IndexReg = ShVal.getNode()->getOperand(0);
1048193323Sed          ConstantSDNode *AddVal =
1049193323Sed            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
1050243830Sdim          uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1051224145Sdim          if (!FoldOffsetIntoAddress(Disp, AM))
1052224145Sdim            return false;
1053193323Sed        }
1054224145Sdim
1055224145Sdim        AM.IndexReg = ShVal;
1056193323Sed        return false;
1057193323Sed      }
1058249423Sdim    }
1059193323Sed    break;
1060193323Sed
1061234353Sdim  case ISD::SRL: {
1062234353Sdim    // Scale must not be used already.
1063276479Sdim    if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1064234353Sdim
1065234353Sdim    SDValue And = N.getOperand(0);
1066234353Sdim    if (And.getOpcode() != ISD::AND) break;
1067234353Sdim    SDValue X = And.getOperand(0);
1068234353Sdim
1069234353Sdim    // We only handle up to 64-bit values here as those are what matter for
1070234353Sdim    // addressing mode optimizations.
1071261991Sdim    if (X.getSimpleValueType().getSizeInBits() > 64) break;
1072234353Sdim
1073234353Sdim    // The mask used for the transform is expected to be post-shift, but we
1074234353Sdim    // found the shift first so just apply the shift to the mask before passing
1075234353Sdim    // it down.
1076234353Sdim    if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1077234353Sdim        !isa<ConstantSDNode>(And.getOperand(1)))
1078234353Sdim      break;
1079234353Sdim    uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1080234353Sdim
1081234353Sdim    // Try to fold the mask and shift into the scale, and return false if we
1082234353Sdim    // succeed.
1083234353Sdim    if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1084234353Sdim      return false;
1085234353Sdim    break;
1086234353Sdim  }
1087234353Sdim
1088193323Sed  case ISD::SMUL_LOHI:
1089193323Sed  case ISD::UMUL_LOHI:
1090193323Sed    // A mul_lohi where we need the low part can be folded as a plain multiply.
1091193323Sed    if (N.getResNo() != 0) break;
1092193323Sed    // FALL THROUGH
1093193323Sed  case ISD::MUL:
1094193323Sed  case X86ISD::MUL_IMM:
1095193323Sed    // X*[3,5,9] -> X+X*[2,4,8]
1096193323Sed    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1097276479Sdim        AM.Base_Reg.getNode() == nullptr &&
1098276479Sdim        AM.IndexReg.getNode() == nullptr) {
1099193323Sed      if (ConstantSDNode
1100193323Sed            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
1101193323Sed        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1102193323Sed            CN->getZExtValue() == 9) {
1103193323Sed          AM.Scale = unsigned(CN->getZExtValue())-1;
1104193323Sed
1105193323Sed          SDValue MulVal = N.getNode()->getOperand(0);
1106193323Sed          SDValue Reg;
1107193323Sed
1108193323Sed          // Okay, we know that we have a scale by now.  However, if the scaled
1109193323Sed          // value is an add of something and a constant, we can fold the
1110193323Sed          // constant into the disp field here.
1111193323Sed          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1112193323Sed              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
1113193323Sed            Reg = MulVal.getNode()->getOperand(0);
1114193323Sed            ConstantSDNode *AddVal =
1115193323Sed              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
1116224145Sdim            uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1117224145Sdim            if (FoldOffsetIntoAddress(Disp, AM))
1118193323Sed              Reg = N.getNode()->getOperand(0);
1119193323Sed          } else {
1120193323Sed            Reg = N.getNode()->getOperand(0);
1121193323Sed          }
1122193323Sed
1123207618Srdivacky          AM.IndexReg = AM.Base_Reg = Reg;
1124193323Sed          return false;
1125193323Sed        }
1126193323Sed    }
1127193323Sed    break;
1128193323Sed
1129193323Sed  case ISD::SUB: {
1130193323Sed    // Given A-B, if A can be completely folded into the address and
1131193323Sed    // the index field with the index field unused, use -B as the index.
1132193323Sed    // This is a win if a has multiple parts that can be folded into
1133193323Sed    // the address. Also, this saves a mov if the base register has
1134193323Sed    // other uses, since it avoids a two-address sub instruction, however
1135193323Sed    // it costs an additional mov if the index register has other uses.
1136193323Sed
1137210299Sed    // Add an artificial use to this node so that we can keep track of
1138210299Sed    // it if it gets CSE'd with a different node.
1139210299Sed    HandleSDNode Handle(N);
1140210299Sed
1141193323Sed    // Test if the LHS of the sub can be folded.
1142193323Sed    X86ISelAddressMode Backup = AM;
1143210299Sed    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
1144193323Sed      AM = Backup;
1145193323Sed      break;
1146193323Sed    }
1147193323Sed    // Test if the index field is free for use.
1148195098Sed    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1149193323Sed      AM = Backup;
1150193323Sed      break;
1151193323Sed    }
1152205407Srdivacky
1153193323Sed    int Cost = 0;
1154210299Sed    SDValue RHS = Handle.getValue().getNode()->getOperand(1);
1155193323Sed    // If the RHS involves a register with multiple uses, this
1156193323Sed    // transformation incurs an extra mov, due to the neg instruction
1157193323Sed    // clobbering its operand.
1158193323Sed    if (!RHS.getNode()->hasOneUse() ||
1159193323Sed        RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1160193323Sed        RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1161193323Sed        RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1162193323Sed        (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1163193323Sed         RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
1164193323Sed      ++Cost;
1165193323Sed    // If the base is a register with multiple uses, this
1166193323Sed    // transformation may save a mov.
1167193323Sed    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
1168207618Srdivacky         AM.Base_Reg.getNode() &&
1169207618Srdivacky         !AM.Base_Reg.getNode()->hasOneUse()) ||
1170193323Sed        AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1171193323Sed      --Cost;
1172193323Sed    // If the folded LHS was interesting, this transformation saves
1173193323Sed    // address arithmetic.
1174193323Sed    if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1175193323Sed        ((AM.Disp != 0) && (Backup.Disp == 0)) +
1176193323Sed        (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1177193323Sed      --Cost;
1178193323Sed    // If it doesn't look like it may be an overall win, don't do it.
1179193323Sed    if (Cost >= 0) {
1180193323Sed      AM = Backup;
1181193323Sed      break;
1182193323Sed    }
1183193323Sed
1184193323Sed    // Ok, the transformation is legal and appears profitable. Go for it.
1185193323Sed    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
1186193323Sed    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1187193323Sed    AM.IndexReg = Neg;
1188193323Sed    AM.Scale = 1;
1189193323Sed
1190193323Sed    // Insert the new nodes into the topological ordering.
1191234353Sdim    InsertDAGNode(*CurDAG, N, Zero);
1192234353Sdim    InsertDAGNode(*CurDAG, N, Neg);
1193193323Sed    return false;
1194193323Sed  }
1195193323Sed
1196193323Sed  case ISD::ADD: {
1197210299Sed    // Add an artificial use to this node so that we can keep track of
1198210299Sed    // it if it gets CSE'd with a different node.
1199210299Sed    HandleSDNode Handle(N);
1200210299Sed
1201193323Sed    X86ISelAddressMode Backup = AM;
1202218893Sdim    if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1203218893Sdim        !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1204210299Sed      return false;
1205210299Sed    AM = Backup;
1206239462Sdim
1207205407Srdivacky    // Try again after commuting the operands.
1208218893Sdim    if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
1209218893Sdim        !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1210210299Sed      return false;
1211193323Sed    AM = Backup;
1212193323Sed
1213193323Sed    // If we couldn't fold both operands into the address at the same time,
1214193323Sed    // see if we can just put each operand into a register and fold at least
1215193323Sed    // the add.
1216193323Sed    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1217207618Srdivacky        !AM.Base_Reg.getNode() &&
1218195098Sed        !AM.IndexReg.getNode()) {
1219218893Sdim      N = Handle.getValue();
1220218893Sdim      AM.Base_Reg = N.getOperand(0);
1221218893Sdim      AM.IndexReg = N.getOperand(1);
1222193323Sed      AM.Scale = 1;
1223193323Sed      return false;
1224193323Sed    }
1225218893Sdim    N = Handle.getValue();
1226193323Sed    break;
1227193323Sed  }
1228193323Sed
1229193323Sed  case ISD::OR:
1230193323Sed    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
1231218893Sdim    if (CurDAG->isBaseWithConstantOffset(N)) {
1232193323Sed      X86ISelAddressMode Backup = AM;
1233207618Srdivacky      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
1234205407Srdivacky
1235193323Sed      // Start with the LHS as an addr mode.
1236210299Sed      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1237224145Sdim          !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
1238193323Sed        return false;
1239193323Sed      AM = Backup;
1240193323Sed    }
1241193323Sed    break;
1242239462Sdim
1243193323Sed  case ISD::AND: {
1244193323Sed    // Perform some heroic transforms on an and of a constant-count shift
1245193323Sed    // with a constant to enable use of the scaled offset field.
1246193323Sed
1247193323Sed    // Scale must not be used already.
1248276479Sdim    if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1249193323Sed
1250234353Sdim    SDValue Shift = N.getOperand(0);
1251234353Sdim    if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1252193323Sed    SDValue X = Shift.getOperand(0);
1253193323Sed
1254234353Sdim    // We only handle up to 64-bit values here as those are what matter for
1255234353Sdim    // addressing mode optimizations.
1256261991Sdim    if (X.getSimpleValueType().getSizeInBits() > 64) break;
1257193323Sed
1258234353Sdim    if (!isa<ConstantSDNode>(N.getOperand(1)))
1259234353Sdim      break;
1260234353Sdim    uint64_t Mask = N.getConstantOperandVal(1);
1261193323Sed
1262234353Sdim    // Try to fold the mask and shift into an extract and scale.
1263234353Sdim    if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1264234353Sdim      return false;
1265193323Sed
1266234353Sdim    // Try to fold the mask and shift directly into the scale.
1267234353Sdim    if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1268234353Sdim      return false;
1269193323Sed
1270234353Sdim    // Try to swap the mask and shift to place shifts which can be done as
1271234353Sdim    // a scale on the outside of the mask.
1272234353Sdim    if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1273234353Sdim      return false;
1274234353Sdim    break;
1275193323Sed  }
1276193323Sed  }
1277193323Sed
1278193323Sed  return MatchAddressBase(N, AM);
1279193323Sed}
1280193323Sed
1281193323Sed/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1282193323Sed/// specified addressing mode without any further recursion.
1283193323Sedbool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1284193323Sed  // Is the base register already occupied?
1285207618Srdivacky  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1286193323Sed    // If so, check to see if the scale index register is set.
1287276479Sdim    if (!AM.IndexReg.getNode()) {
1288193323Sed      AM.IndexReg = N;
1289193323Sed      AM.Scale = 1;
1290193323Sed      return false;
1291193323Sed    }
1292193323Sed
1293193323Sed    // Otherwise, we cannot select it.
1294193323Sed    return true;
1295193323Sed  }
1296193323Sed
1297193323Sed  // Default, generate it as a register.
1298193323Sed  AM.BaseType = X86ISelAddressMode::RegBase;
1299207618Srdivacky  AM.Base_Reg = N;
1300193323Sed  return false;
1301193323Sed}
1302193323Sed
1303193323Sed/// SelectAddr - returns true if it is able pattern match an addressing mode.
1304193323Sed/// It returns the operands which make up the maximal addressing mode it can
1305193323Sed/// match by reference.
1306218893Sdim///
1307218893Sdim/// Parent is the parent node of the addr operand that is being matched.  It
1308218893Sdim/// is always a load, store, atomic node, or null.  It is only null when
1309218893Sdim/// checking memory operands for inline asm nodes.
1310218893Sdimbool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1311193323Sed                                 SDValue &Scale, SDValue &Index,
1312193323Sed                                 SDValue &Disp, SDValue &Segment) {
1313193323Sed  X86ISelAddressMode AM;
1314239462Sdim
1315218893Sdim  if (Parent &&
1316218893Sdim      // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1317218893Sdim      // that are not a MemSDNode, and thus don't have proper addrspace info.
1318218893Sdim      Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1319218893Sdim      Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1320243830Sdim      Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1321243830Sdim      Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1322243830Sdim      Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1323218893Sdim    unsigned AddrSpace =
1324218893Sdim      cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1325218893Sdim    // AddrSpace 256 -> GS, 257 -> FS.
1326218893Sdim    if (AddrSpace == 256)
1327218893Sdim      AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1328218893Sdim    if (AddrSpace == 257)
1329218893Sdim      AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1330218893Sdim  }
1331239462Sdim
1332201360Srdivacky  if (MatchAddress(N, AM))
1333193323Sed    return false;
1334193323Sed
1335261991Sdim  MVT VT = N.getSimpleValueType();
1336193323Sed  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1337207618Srdivacky    if (!AM.Base_Reg.getNode())
1338207618Srdivacky      AM.Base_Reg = CurDAG->getRegister(0, VT);
1339193323Sed  }
1340193323Sed
1341193323Sed  if (!AM.IndexReg.getNode())
1342193323Sed    AM.IndexReg = CurDAG->getRegister(0, VT);
1343193323Sed
1344193323Sed  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1345193323Sed  return true;
1346193323Sed}
1347193323Sed
1348193323Sed/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
1349193323Sed/// match a load whose top elements are either undef or zeros.  The load flavor
1350193323Sed/// is derived from the type of N, which is either v4f32 or v2f64.
1351204642Srdivacky///
1352204642Srdivacky/// We also return:
1353204642Srdivacky///   PatternChainNode: this is the matched node that has a chain input and
1354204642Srdivacky///   output.
1355204642Srdivackybool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
1356193323Sed                                          SDValue N, SDValue &Base,
1357193323Sed                                          SDValue &Scale, SDValue &Index,
1358193323Sed                                          SDValue &Disp, SDValue &Segment,
1359204642Srdivacky                                          SDValue &PatternNodeWithChain) {
1360193323Sed  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1361204642Srdivacky    PatternNodeWithChain = N.getOperand(0);
1362204642Srdivacky    if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1363204642Srdivacky        PatternNodeWithChain.hasOneUse() &&
1364204642Srdivacky        IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1365207618Srdivacky        IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1366204642Srdivacky      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1367218893Sdim      if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1368193323Sed        return false;
1369193323Sed      return true;
1370193323Sed    }
1371193323Sed  }
1372193323Sed
1373193323Sed  // Also handle the case where we explicitly require zeros in the top
1374193323Sed  // elements.  This is a vector shuffle from the zero vector.
1375193323Sed  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1376193323Sed      // Check to see if the top elements are all zeros (or bitcast of zeros).
1377239462Sdim      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1378193323Sed      N.getOperand(0).getNode()->hasOneUse() &&
1379193323Sed      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1380204642Srdivacky      N.getOperand(0).getOperand(0).hasOneUse() &&
1381204642Srdivacky      IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1382207618Srdivacky      IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1383193323Sed    // Okay, this is a zero extending load.  Fold it.
1384193323Sed    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1385218893Sdim    if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1386193323Sed      return false;
1387204642Srdivacky    PatternNodeWithChain = SDValue(LD, 0);
1388193323Sed    return true;
1389193323Sed  }
1390193323Sed  return false;
1391193323Sed}
1392193323Sed
1393193323Sed
1394261991Sdimbool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
1395261991Sdim  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1396261991Sdim    uint64_t ImmVal = CN->getZExtValue();
1397261991Sdim    if ((uint32_t)ImmVal != (uint64_t)ImmVal)
1398261991Sdim      return false;
1399261991Sdim
1400261991Sdim    Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64);
1401261991Sdim    return true;
1402261991Sdim  }
1403261991Sdim
1404261991Sdim  // In static codegen with small code model, we can get the address of a label
1405261991Sdim  // into a register with 'movl'. TableGen has already made sure we're looking
1406261991Sdim  // at a label of some kind.
1407261991Sdim  assert(N->getOpcode() == X86ISD::Wrapper &&
1408261991Sdim         "Unexpected node type for MOV32ri64");
1409261991Sdim  N = N.getOperand(0);
1410261991Sdim
1411261991Sdim  if (N->getOpcode() != ISD::TargetConstantPool &&
1412261991Sdim      N->getOpcode() != ISD::TargetJumpTable &&
1413261991Sdim      N->getOpcode() != ISD::TargetGlobalAddress &&
1414261991Sdim      N->getOpcode() != ISD::TargetExternalSymbol &&
1415261991Sdim      N->getOpcode() != ISD::TargetBlockAddress)
1416261991Sdim    return false;
1417261991Sdim
1418261991Sdim  Imm = N;
1419261991Sdim  return TM.getCodeModel() == CodeModel::Small;
1420261991Sdim}
1421261991Sdim
1422261991Sdimbool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
1423261991Sdim                                         SDValue &Scale, SDValue &Index,
1424261991Sdim                                         SDValue &Disp, SDValue &Segment) {
1425261991Sdim  if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1426261991Sdim    return false;
1427261991Sdim
1428261991Sdim  SDLoc DL(N);
1429261991Sdim  RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
1430261991Sdim  if (RN && RN->getReg() == 0)
1431261991Sdim    Base = CurDAG->getRegister(0, MVT::i64);
1432261991Sdim  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(N)) {
1433261991Sdim    // Base could already be %rip, particularly in the x32 ABI.
1434261991Sdim    Base = SDValue(CurDAG->getMachineNode(
1435261991Sdim                       TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1436261991Sdim                       CurDAG->getTargetConstant(0, MVT::i64),
1437261991Sdim                       Base,
1438261991Sdim                       CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
1439261991Sdim                   0);
1440261991Sdim  }
1441261991Sdim
1442261991Sdim  RN = dyn_cast<RegisterSDNode>(Index);
1443261991Sdim  if (RN && RN->getReg() == 0)
1444261991Sdim    Index = CurDAG->getRegister(0, MVT::i64);
1445261991Sdim  else {
1446261991Sdim    assert(Index.getValueType() == MVT::i32 &&
1447261991Sdim           "Expect to be extending 32-bit registers for use in LEA");
1448261991Sdim    Index = SDValue(CurDAG->getMachineNode(
1449261991Sdim                        TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1450261991Sdim                        CurDAG->getTargetConstant(0, MVT::i64),
1451261991Sdim                        Index,
1452261991Sdim                        CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
1453261991Sdim                    0);
1454261991Sdim  }
1455261991Sdim
1456261991Sdim  return true;
1457261991Sdim}
1458261991Sdim
1459193323Sed/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1460193323Sed/// mode it matches can be cost effectively emitted as an LEA instruction.
1461218893Sdimbool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
1462193323Sed                                    SDValue &Base, SDValue &Scale,
1463210299Sed                                    SDValue &Index, SDValue &Disp,
1464210299Sed                                    SDValue &Segment) {
1465193323Sed  X86ISelAddressMode AM;
1466193323Sed
1467193323Sed  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1468193323Sed  // segments.
1469193323Sed  SDValue Copy = AM.Segment;
1470193323Sed  SDValue T = CurDAG->getRegister(0, MVT::i32);
1471193323Sed  AM.Segment = T;
1472193323Sed  if (MatchAddress(N, AM))
1473193323Sed    return false;
1474193323Sed  assert (T == AM.Segment);
1475193323Sed  AM.Segment = Copy;
1476193323Sed
1477261991Sdim  MVT VT = N.getSimpleValueType();
1478193323Sed  unsigned Complexity = 0;
1479193323Sed  if (AM.BaseType == X86ISelAddressMode::RegBase)
1480207618Srdivacky    if (AM.Base_Reg.getNode())
1481193323Sed      Complexity = 1;
1482193323Sed    else
1483207618Srdivacky      AM.Base_Reg = CurDAG->getRegister(0, VT);
1484193323Sed  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1485193323Sed    Complexity = 4;
1486193323Sed
1487193323Sed  if (AM.IndexReg.getNode())
1488193323Sed    Complexity++;
1489193323Sed  else
1490193323Sed    AM.IndexReg = CurDAG->getRegister(0, VT);
1491193323Sed
1492193323Sed  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1493193323Sed  // a simple shift.
1494193323Sed  if (AM.Scale > 1)
1495193323Sed    Complexity++;
1496193323Sed
1497193323Sed  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1498193323Sed  // to a LEA. This is determined with some expermentation but is by no means
1499193323Sed  // optimal (especially for code size consideration). LEA is nice because of
1500193323Sed  // its three-address nature. Tweak the cost function again when we can run
1501193323Sed  // convertToThreeAddress() at register allocation time.
1502193323Sed  if (AM.hasSymbolicDisplacement()) {
1503193323Sed    // For X86-64, we should always use lea to materialize RIP relative
1504193323Sed    // addresses.
1505193323Sed    if (Subtarget->is64Bit())
1506193323Sed      Complexity = 4;
1507193323Sed    else
1508193323Sed      Complexity += 2;
1509193323Sed  }
1510193323Sed
1511207618Srdivacky  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1512193323Sed    Complexity++;
1513193323Sed
1514198090Srdivacky  // If it isn't worth using an LEA, reject it.
1515198090Srdivacky  if (Complexity <= 2)
1516198090Srdivacky    return false;
1517239462Sdim
1518198090Srdivacky  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1519198090Srdivacky  return true;
1520193323Sed}
1521193323Sed
1522194612Sed/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1523218893Sdimbool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
1524194612Sed                                        SDValue &Scale, SDValue &Index,
1525210299Sed                                        SDValue &Disp, SDValue &Segment) {
1526194612Sed  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1527194612Sed  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1528239462Sdim
1529194612Sed  X86ISelAddressMode AM;
1530194612Sed  AM.GV = GA->getGlobal();
1531194612Sed  AM.Disp += GA->getOffset();
1532207618Srdivacky  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1533195098Sed  AM.SymbolFlags = GA->getTargetFlags();
1534195098Sed
1535194612Sed  if (N.getValueType() == MVT::i32) {
1536194612Sed    AM.Scale = 1;
1537194612Sed    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1538194612Sed  } else {
1539194612Sed    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1540194612Sed  }
1541239462Sdim
1542194612Sed  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1543194612Sed  return true;
1544194612Sed}
1545194612Sed
1546194612Sed
1547202375Srdivackybool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
1548193323Sed                                  SDValue &Base, SDValue &Scale,
1549193323Sed                                  SDValue &Index, SDValue &Disp,
1550193323Sed                                  SDValue &Segment) {
1551204642Srdivacky  if (!ISD::isNON_EXTLoad(N.getNode()) ||
1552204642Srdivacky      !IsProfitableToFold(N, P, P) ||
1553207618Srdivacky      !IsLegalToFold(N, P, P, OptLevel))
1554204642Srdivacky    return false;
1555239462Sdim
1556218893Sdim  return SelectAddr(N.getNode(),
1557218893Sdim                    N.getOperand(1), Base, Scale, Index, Disp, Segment);
1558193323Sed}
1559193323Sed
1560193323Sed/// getGlobalBaseReg - Return an SDNode that returns the value of
1561193323Sed/// the global base register. Output instructions required to
1562193323Sed/// initialize the global base register, if necessary.
1563193323Sed///
1564193323SedSDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1565193399Sed  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1566261991Sdim  return CurDAG->getRegister(GlobalBaseReg,
1567261991Sdim                             getTargetLowering()->getPointerTy()).getNode();
1568193323Sed}
1569193323Sed
1570193323SedSDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1571193323Sed  SDValue Chain = Node->getOperand(0);
1572193323Sed  SDValue In1 = Node->getOperand(1);
1573193323Sed  SDValue In2L = Node->getOperand(2);
1574193323Sed  SDValue In2H = Node->getOperand(3);
1575243830Sdim
1576193323Sed  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1577218893Sdim  if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1578276479Sdim    return nullptr;
1579198090Srdivacky  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1580198090Srdivacky  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1581198090Srdivacky  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
1582261991Sdim  SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
1583251662Sdim                                           MVT::i32, MVT::i32, MVT::Other, Ops);
1584198090Srdivacky  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
1585198090Srdivacky  return ResNode;
1586193323Sed}
1587193323Sed
1588243830Sdim/// Atomic opcode table
1589243830Sdim///
1590223017Sdimenum AtomicOpc {
1591243830Sdim  ADD,
1592243830Sdim  SUB,
1593243830Sdim  INC,
1594243830Sdim  DEC,
1595223017Sdim  OR,
1596223017Sdim  AND,
1597223017Sdim  XOR,
1598223017Sdim  AtomicOpcEnd
1599223017Sdim};
1600223017Sdim
1601223017Sdimenum AtomicSz {
1602223017Sdim  ConstantI8,
1603223017Sdim  I8,
1604223017Sdim  SextConstantI16,
1605223017Sdim  ConstantI16,
1606223017Sdim  I16,
1607223017Sdim  SextConstantI32,
1608223017Sdim  ConstantI32,
1609223017Sdim  I32,
1610223017Sdim  SextConstantI64,
1611223017Sdim  ConstantI64,
1612223017Sdim  I64,
1613223017Sdim  AtomicSzEnd
1614223017Sdim};
1615223017Sdim
1616234353Sdimstatic const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
1617223017Sdim  {
1618243830Sdim    X86::LOCK_ADD8mi,
1619243830Sdim    X86::LOCK_ADD8mr,
1620243830Sdim    X86::LOCK_ADD16mi8,
1621243830Sdim    X86::LOCK_ADD16mi,
1622243830Sdim    X86::LOCK_ADD16mr,
1623243830Sdim    X86::LOCK_ADD32mi8,
1624243830Sdim    X86::LOCK_ADD32mi,
1625243830Sdim    X86::LOCK_ADD32mr,
1626243830Sdim    X86::LOCK_ADD64mi8,
1627243830Sdim    X86::LOCK_ADD64mi32,
1628243830Sdim    X86::LOCK_ADD64mr,
1629243830Sdim  },
1630243830Sdim  {
1631243830Sdim    X86::LOCK_SUB8mi,
1632243830Sdim    X86::LOCK_SUB8mr,
1633243830Sdim    X86::LOCK_SUB16mi8,
1634243830Sdim    X86::LOCK_SUB16mi,
1635243830Sdim    X86::LOCK_SUB16mr,
1636243830Sdim    X86::LOCK_SUB32mi8,
1637243830Sdim    X86::LOCK_SUB32mi,
1638243830Sdim    X86::LOCK_SUB32mr,
1639243830Sdim    X86::LOCK_SUB64mi8,
1640243830Sdim    X86::LOCK_SUB64mi32,
1641243830Sdim    X86::LOCK_SUB64mr,
1642243830Sdim  },
1643243830Sdim  {
1644243830Sdim    0,
1645243830Sdim    X86::LOCK_INC8m,
1646243830Sdim    0,
1647243830Sdim    0,
1648243830Sdim    X86::LOCK_INC16m,
1649243830Sdim    0,
1650243830Sdim    0,
1651243830Sdim    X86::LOCK_INC32m,
1652243830Sdim    0,
1653243830Sdim    0,
1654243830Sdim    X86::LOCK_INC64m,
1655243830Sdim  },
1656243830Sdim  {
1657243830Sdim    0,
1658243830Sdim    X86::LOCK_DEC8m,
1659243830Sdim    0,
1660243830Sdim    0,
1661243830Sdim    X86::LOCK_DEC16m,
1662243830Sdim    0,
1663243830Sdim    0,
1664243830Sdim    X86::LOCK_DEC32m,
1665243830Sdim    0,
1666243830Sdim    0,
1667243830Sdim    X86::LOCK_DEC64m,
1668243830Sdim  },
1669243830Sdim  {
1670223017Sdim    X86::LOCK_OR8mi,
1671223017Sdim    X86::LOCK_OR8mr,
1672223017Sdim    X86::LOCK_OR16mi8,
1673223017Sdim    X86::LOCK_OR16mi,
1674223017Sdim    X86::LOCK_OR16mr,
1675223017Sdim    X86::LOCK_OR32mi8,
1676223017Sdim    X86::LOCK_OR32mi,
1677223017Sdim    X86::LOCK_OR32mr,
1678223017Sdim    X86::LOCK_OR64mi8,
1679223017Sdim    X86::LOCK_OR64mi32,
1680243830Sdim    X86::LOCK_OR64mr,
1681223017Sdim  },
1682223017Sdim  {
1683223017Sdim    X86::LOCK_AND8mi,
1684223017Sdim    X86::LOCK_AND8mr,
1685223017Sdim    X86::LOCK_AND16mi8,
1686223017Sdim    X86::LOCK_AND16mi,
1687223017Sdim    X86::LOCK_AND16mr,
1688223017Sdim    X86::LOCK_AND32mi8,
1689223017Sdim    X86::LOCK_AND32mi,
1690223017Sdim    X86::LOCK_AND32mr,
1691223017Sdim    X86::LOCK_AND64mi8,
1692223017Sdim    X86::LOCK_AND64mi32,
1693243830Sdim    X86::LOCK_AND64mr,
1694223017Sdim  },
1695223017Sdim  {
1696223017Sdim    X86::LOCK_XOR8mi,
1697223017Sdim    X86::LOCK_XOR8mr,
1698223017Sdim    X86::LOCK_XOR16mi8,
1699223017Sdim    X86::LOCK_XOR16mi,
1700223017Sdim    X86::LOCK_XOR16mr,
1701223017Sdim    X86::LOCK_XOR32mi8,
1702223017Sdim    X86::LOCK_XOR32mi,
1703223017Sdim    X86::LOCK_XOR32mr,
1704223017Sdim    X86::LOCK_XOR64mi8,
1705223017Sdim    X86::LOCK_XOR64mi32,
1706243830Sdim    X86::LOCK_XOR64mr,
1707223017Sdim  }
1708223017Sdim};
1709223017Sdim
1710243830Sdim// Return the target constant operand for atomic-load-op and do simple
1711243830Sdim// translations, such as from atomic-load-add to lock-sub. The return value is
1712243830Sdim// one of the following 3 cases:
1713243830Sdim// + target-constant, the operand could be supported as a target constant.
1714243830Sdim// + empty, the operand is not needed any more with the new op selected.
1715243830Sdim// + non-empty, otherwise.
1716243830Sdimstatic SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
1717261991Sdim                                                SDLoc dl,
1718261991Sdim                                                enum AtomicOpc &Op, MVT NVT,
1719243830Sdim                                                SDValue Val) {
1720243830Sdim  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
1721243830Sdim    int64_t CNVal = CN->getSExtValue();
1722243830Sdim    // Quit if not 32-bit imm.
1723243830Sdim    if ((int32_t)CNVal != CNVal)
1724243830Sdim      return Val;
1725243830Sdim    // For atomic-load-add, we could do some optimizations.
1726243830Sdim    if (Op == ADD) {
1727243830Sdim      // Translate to INC/DEC if ADD by 1 or -1.
1728243830Sdim      if ((CNVal == 1) || (CNVal == -1)) {
1729243830Sdim        Op = (CNVal == 1) ? INC : DEC;
1730243830Sdim        // No more constant operand after being translated into INC/DEC.
1731243830Sdim        return SDValue();
1732243830Sdim      }
1733243830Sdim      // Translate to SUB if ADD by negative value.
1734243830Sdim      if (CNVal < 0) {
1735243830Sdim        Op = SUB;
1736243830Sdim        CNVal = -CNVal;
1737243830Sdim      }
1738243830Sdim    }
1739243830Sdim    return CurDAG->getTargetConstant(CNVal, NVT);
1740243830Sdim  }
1741243830Sdim
1742243830Sdim  // If the value operand is single-used, try to optimize it.
1743243830Sdim  if (Op == ADD && Val.hasOneUse()) {
1744243830Sdim    // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
1745243830Sdim    if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
1746243830Sdim      Op = SUB;
1747243830Sdim      return Val.getOperand(1);
1748243830Sdim    }
1749243830Sdim    // A special case for i16, which needs truncating as, in most cases, it's
1750243830Sdim    // promoted to i32. We will translate
1751243830Sdim    // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
1752243830Sdim    if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
1753243830Sdim        Val.getOperand(0).getOpcode() == ISD::SUB &&
1754243830Sdim        X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
1755243830Sdim      Op = SUB;
1756243830Sdim      Val = Val.getOperand(0);
1757243830Sdim      return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
1758243830Sdim                                            Val.getOperand(1));
1759243830Sdim    }
1760243830Sdim  }
1761243830Sdim
1762243830Sdim  return Val;
1763243830Sdim}
1764243830Sdim
1765261991SdimSDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
1766223017Sdim  if (Node->hasAnyUseOfValue(0))
1767276479Sdim    return nullptr;
1768239462Sdim
1769261991Sdim  SDLoc dl(Node);
1770243830Sdim
1771223017Sdim  // Optimize common patterns for __sync_or_and_fetch and similar arith
1772223017Sdim  // operations where the result is not used. This allows us to use the "lock"
1773223017Sdim  // version of the arithmetic instruction.
1774223017Sdim  SDValue Chain = Node->getOperand(0);
1775223017Sdim  SDValue Ptr = Node->getOperand(1);
1776223017Sdim  SDValue Val = Node->getOperand(2);
1777223017Sdim  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1778223017Sdim  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1779276479Sdim    return nullptr;
1780223017Sdim
1781223017Sdim  // Which index into the table.
1782223017Sdim  enum AtomicOpc Op;
1783223017Sdim  switch (Node->getOpcode()) {
1784243830Sdim    default:
1785276479Sdim      return nullptr;
1786223017Sdim    case ISD::ATOMIC_LOAD_OR:
1787223017Sdim      Op = OR;
1788223017Sdim      break;
1789223017Sdim    case ISD::ATOMIC_LOAD_AND:
1790223017Sdim      Op = AND;
1791223017Sdim      break;
1792223017Sdim    case ISD::ATOMIC_LOAD_XOR:
1793223017Sdim      Op = XOR;
1794223017Sdim      break;
1795243830Sdim    case ISD::ATOMIC_LOAD_ADD:
1796243830Sdim      Op = ADD;
1797243830Sdim      break;
1798223017Sdim  }
1799251662Sdim
1800243830Sdim  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
1801243830Sdim  bool isUnOp = !Val.getNode();
1802243830Sdim  bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
1803239462Sdim
1804223017Sdim  unsigned Opc = 0;
1805261991Sdim  switch (NVT.SimpleTy) {
1806276479Sdim    default: return nullptr;
1807223017Sdim    case MVT::i8:
1808223017Sdim      if (isCN)
1809223017Sdim        Opc = AtomicOpcTbl[Op][ConstantI8];
1810223017Sdim      else
1811223017Sdim        Opc = AtomicOpcTbl[Op][I8];
1812223017Sdim      break;
1813223017Sdim    case MVT::i16:
1814223017Sdim      if (isCN) {
1815223017Sdim        if (immSext8(Val.getNode()))
1816223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI16];
1817223017Sdim        else
1818223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI16];
1819223017Sdim      } else
1820223017Sdim        Opc = AtomicOpcTbl[Op][I16];
1821223017Sdim      break;
1822223017Sdim    case MVT::i32:
1823223017Sdim      if (isCN) {
1824223017Sdim        if (immSext8(Val.getNode()))
1825223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI32];
1826223017Sdim        else
1827223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI32];
1828223017Sdim      } else
1829223017Sdim        Opc = AtomicOpcTbl[Op][I32];
1830223017Sdim      break;
1831223017Sdim    case MVT::i64:
1832224145Sdim      Opc = AtomicOpcTbl[Op][I64];
1833223017Sdim      if (isCN) {
1834223017Sdim        if (immSext8(Val.getNode()))
1835223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI64];
1836223017Sdim        else if (i64immSExt32(Val.getNode()))
1837223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI64];
1838224145Sdim      }
1839223017Sdim      break;
1840223017Sdim  }
1841239462Sdim
1842224145Sdim  assert(Opc != 0 && "Invalid arith lock transform!");
1843224145Sdim
1844243830Sdim  SDValue Ret;
1845223017Sdim  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
1846223017Sdim                                                 dl, NVT), 0);
1847223017Sdim  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1848223017Sdim  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1849243830Sdim  if (isUnOp) {
1850243830Sdim    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
1851251662Sdim    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
1852243830Sdim  } else {
1853243830Sdim    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
1854251662Sdim    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
1855243830Sdim  }
1856223017Sdim  cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
1857223017Sdim  SDValue RetVals[] = { Undef, Ret };
1858276479Sdim  return CurDAG->getMergeValues(RetVals, dl).getNode();
1859223017Sdim}
1860223017Sdim
1861198090Srdivacky/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
1862198090Srdivacky/// any uses which require the SF or OF bits to be accurate.
1863198090Srdivackystatic bool HasNoSignedComparisonUses(SDNode *N) {
1864198090Srdivacky  // Examine each user of the node.
1865198090Srdivacky  for (SDNode::use_iterator UI = N->use_begin(),
1866198090Srdivacky         UE = N->use_end(); UI != UE; ++UI) {
1867198090Srdivacky    // Only examine CopyToReg uses.
1868198090Srdivacky    if (UI->getOpcode() != ISD::CopyToReg)
1869198090Srdivacky      return false;
1870198090Srdivacky    // Only examine CopyToReg uses that copy to EFLAGS.
1871198090Srdivacky    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
1872198090Srdivacky          X86::EFLAGS)
1873198090Srdivacky      return false;
1874198090Srdivacky    // Examine each user of the CopyToReg use.
1875198090Srdivacky    for (SDNode::use_iterator FlagUI = UI->use_begin(),
1876198090Srdivacky           FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
1877198090Srdivacky      // Only examine the Flag result.
1878198090Srdivacky      if (FlagUI.getUse().getResNo() != 1) continue;
1879198090Srdivacky      // Anything unusual: assume conservatively.
1880198090Srdivacky      if (!FlagUI->isMachineOpcode()) return false;
1881198090Srdivacky      // Examine the opcode of the user.
1882198090Srdivacky      switch (FlagUI->getMachineOpcode()) {
1883198090Srdivacky      // These comparisons don't treat the most significant bit specially.
1884198090Srdivacky      case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
1885198090Srdivacky      case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
1886198090Srdivacky      case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
1887198090Srdivacky      case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
1888203954Srdivacky      case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
1889203954Srdivacky      case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
1890198090Srdivacky      case X86::CMOVA16rr: case X86::CMOVA16rm:
1891198090Srdivacky      case X86::CMOVA32rr: case X86::CMOVA32rm:
1892198090Srdivacky      case X86::CMOVA64rr: case X86::CMOVA64rm:
1893198090Srdivacky      case X86::CMOVAE16rr: case X86::CMOVAE16rm:
1894198090Srdivacky      case X86::CMOVAE32rr: case X86::CMOVAE32rm:
1895198090Srdivacky      case X86::CMOVAE64rr: case X86::CMOVAE64rm:
1896198090Srdivacky      case X86::CMOVB16rr: case X86::CMOVB16rm:
1897198090Srdivacky      case X86::CMOVB32rr: case X86::CMOVB32rm:
1898198090Srdivacky      case X86::CMOVB64rr: case X86::CMOVB64rm:
1899198090Srdivacky      case X86::CMOVBE16rr: case X86::CMOVBE16rm:
1900198090Srdivacky      case X86::CMOVBE32rr: case X86::CMOVBE32rm:
1901198090Srdivacky      case X86::CMOVBE64rr: case X86::CMOVBE64rm:
1902198090Srdivacky      case X86::CMOVE16rr: case X86::CMOVE16rm:
1903198090Srdivacky      case X86::CMOVE32rr: case X86::CMOVE32rm:
1904198090Srdivacky      case X86::CMOVE64rr: case X86::CMOVE64rm:
1905198090Srdivacky      case X86::CMOVNE16rr: case X86::CMOVNE16rm:
1906198090Srdivacky      case X86::CMOVNE32rr: case X86::CMOVNE32rm:
1907198090Srdivacky      case X86::CMOVNE64rr: case X86::CMOVNE64rm:
1908198090Srdivacky      case X86::CMOVNP16rr: case X86::CMOVNP16rm:
1909198090Srdivacky      case X86::CMOVNP32rr: case X86::CMOVNP32rm:
1910198090Srdivacky      case X86::CMOVNP64rr: case X86::CMOVNP64rm:
1911198090Srdivacky      case X86::CMOVP16rr: case X86::CMOVP16rm:
1912198090Srdivacky      case X86::CMOVP32rr: case X86::CMOVP32rm:
1913198090Srdivacky      case X86::CMOVP64rr: case X86::CMOVP64rm:
1914198090Srdivacky        continue;
1915198090Srdivacky      // Anything else: assume conservatively.
1916198090Srdivacky      default: return false;
1917198090Srdivacky      }
1918198090Srdivacky    }
1919198090Srdivacky  }
1920198090Srdivacky  return true;
1921198090Srdivacky}
1922198090Srdivacky
1923234353Sdim/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
1924234353Sdim/// is suitable for doing the {load; increment or decrement; store} to modify
1925234353Sdim/// transformation.
1926239462Sdimstatic bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
1927234353Sdim                                SDValue StoredVal, SelectionDAG *CurDAG,
1928234353Sdim                                LoadSDNode* &LoadNode, SDValue &InputChain) {
1929234353Sdim
1930234353Sdim  // is the value stored the result of a DEC or INC?
1931234353Sdim  if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
1932234353Sdim
1933234353Sdim  // is the stored value result 0 of the load?
1934234353Sdim  if (StoredVal.getResNo() != 0) return false;
1935234353Sdim
1936234353Sdim  // are there other uses of the loaded value than the inc or dec?
1937234353Sdim  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
1938234353Sdim
1939234353Sdim  // is the store non-extending and non-indexed?
1940234353Sdim  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
1941234353Sdim    return false;
1942234353Sdim
1943234353Sdim  SDValue Load = StoredVal->getOperand(0);
1944234353Sdim  // Is the stored value a non-extending and non-indexed load?
1945234353Sdim  if (!ISD::isNormalLoad(Load.getNode())) return false;
1946234353Sdim
1947234353Sdim  // Return LoadNode by reference.
1948234353Sdim  LoadNode = cast<LoadSDNode>(Load);
1949234353Sdim  // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
1950239462Sdim  EVT LdVT = LoadNode->getMemoryVT();
1951239462Sdim  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
1952234353Sdim      LdVT != MVT::i8)
1953234353Sdim    return false;
1954234353Sdim
1955234353Sdim  // Is store the only read of the loaded value?
1956234353Sdim  if (!Load.hasOneUse())
1957234353Sdim    return false;
1958239462Sdim
1959234353Sdim  // Is the address of the store the same as the load?
1960234353Sdim  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
1961234353Sdim      LoadNode->getOffset() != StoreNode->getOffset())
1962234353Sdim    return false;
1963234353Sdim
1964234353Sdim  // Check if the chain is produced by the load or is a TokenFactor with
1965234353Sdim  // the load output chain as an operand. Return InputChain by reference.
1966234353Sdim  SDValue Chain = StoreNode->getChain();
1967234353Sdim
1968234353Sdim  bool ChainCheck = false;
1969234353Sdim  if (Chain == Load.getValue(1)) {
1970234353Sdim    ChainCheck = true;
1971234353Sdim    InputChain = LoadNode->getChain();
1972234353Sdim  } else if (Chain.getOpcode() == ISD::TokenFactor) {
1973234353Sdim    SmallVector<SDValue, 4> ChainOps;
1974234353Sdim    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
1975234353Sdim      SDValue Op = Chain.getOperand(i);
1976234353Sdim      if (Op == Load.getValue(1)) {
1977234353Sdim        ChainCheck = true;
1978234353Sdim        continue;
1979234353Sdim      }
1980239462Sdim
1981239462Sdim      // Make sure using Op as part of the chain would not cause a cycle here.
1982239462Sdim      // In theory, we could check whether the chain node is a predecessor of
1983239462Sdim      // the load. But that can be very expensive. Instead visit the uses and
1984239462Sdim      // make sure they all have smaller node id than the load.
1985239462Sdim      int LoadId = LoadNode->getNodeId();
1986239462Sdim      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
1987239462Sdim             UE = UI->use_end(); UI != UE; ++UI) {
1988239462Sdim        if (UI.getUse().getResNo() != 0)
1989239462Sdim          continue;
1990239462Sdim        if (UI->getNodeId() > LoadId)
1991239462Sdim          return false;
1992239462Sdim      }
1993239462Sdim
1994234353Sdim      ChainOps.push_back(Op);
1995234353Sdim    }
1996234353Sdim
1997234353Sdim    if (ChainCheck)
1998234353Sdim      // Make a new TokenFactor with all the other input chains except
1999234353Sdim      // for the load.
2000261991Sdim      InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
2001276479Sdim                                   MVT::Other, ChainOps);
2002234353Sdim  }
2003234353Sdim  if (!ChainCheck)
2004234353Sdim    return false;
2005234353Sdim
2006234353Sdim  return true;
2007234353Sdim}
2008234353Sdim
2009234353Sdim/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
2010234353Sdim/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
2011234353Sdimstatic unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
2012234353Sdim  if (Opc == X86ISD::DEC) {
2013234353Sdim    if (LdVT == MVT::i64) return X86::DEC64m;
2014234353Sdim    if (LdVT == MVT::i32) return X86::DEC32m;
2015234353Sdim    if (LdVT == MVT::i16) return X86::DEC16m;
2016234353Sdim    if (LdVT == MVT::i8)  return X86::DEC8m;
2017234353Sdim  } else {
2018234353Sdim    assert(Opc == X86ISD::INC && "unrecognized opcode");
2019234353Sdim    if (LdVT == MVT::i64) return X86::INC64m;
2020234353Sdim    if (LdVT == MVT::i32) return X86::INC32m;
2021234353Sdim    if (LdVT == MVT::i16) return X86::INC16m;
2022234353Sdim    if (LdVT == MVT::i8)  return X86::INC8m;
2023234353Sdim  }
2024234353Sdim  llvm_unreachable("unrecognized size for LdVT");
2025234353Sdim}
2026234353Sdim
2027239462Sdim/// SelectGather - Customized ISel for GATHER operations.
2028239462Sdim///
2029239462SdimSDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
2030239462Sdim  // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
2031239462Sdim  SDValue Chain = Node->getOperand(0);
2032239462Sdim  SDValue VSrc = Node->getOperand(2);
2033239462Sdim  SDValue Base = Node->getOperand(3);
2034239462Sdim  SDValue VIdx = Node->getOperand(4);
2035239462Sdim  SDValue VMask = Node->getOperand(5);
2036239462Sdim  ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
2037239462Sdim  if (!Scale)
2038276479Sdim    return nullptr;
2039239462Sdim
2040239462Sdim  SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
2041239462Sdim                                   MVT::Other);
2042239462Sdim
2043239462Sdim  // Memory Operands: Base, Scale, Index, Disp, Segment
2044239462Sdim  SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
2045239462Sdim  SDValue Segment = CurDAG->getRegister(0, MVT::i32);
2046239462Sdim  const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
2047239462Sdim                          Disp, Segment, VMask, Chain};
2048261991Sdim  SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops);
2049239462Sdim  // Node has 2 outputs: VDst and MVT::Other.
2050239462Sdim  // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
2051239462Sdim  // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
2052239462Sdim  // of ResNode.
2053239462Sdim  ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
2054239462Sdim  ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
2055239462Sdim  return ResNode;
2056239462Sdim}
2057239462Sdim
2058202375SrdivackySDNode *X86DAGToDAGISel::Select(SDNode *Node) {
2059261991Sdim  MVT NVT = Node->getSimpleValueType(0);
2060193323Sed  unsigned Opc, MOpc;
2061193323Sed  unsigned Opcode = Node->getOpcode();
2062261991Sdim  SDLoc dl(Node);
2063239462Sdim
2064204642Srdivacky  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
2065193323Sed
2066193323Sed  if (Node->isMachineOpcode()) {
2067204642Srdivacky    DEBUG(dbgs() << "== ";  Node->dump(CurDAG); dbgs() << '\n');
2068255804Sdim    Node->setNodeId(-1);
2069276479Sdim    return nullptr;   // Already selected.
2070193323Sed  }
2071193323Sed
2072193323Sed  switch (Opcode) {
2073198090Srdivacky  default: break;
2074239462Sdim  case ISD::INTRINSIC_W_CHAIN: {
2075239462Sdim    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2076239462Sdim    switch (IntNo) {
2077239462Sdim    default: break;
2078239462Sdim    case Intrinsic::x86_avx2_gather_d_pd:
2079239462Sdim    case Intrinsic::x86_avx2_gather_d_pd_256:
2080239462Sdim    case Intrinsic::x86_avx2_gather_q_pd:
2081239462Sdim    case Intrinsic::x86_avx2_gather_q_pd_256:
2082239462Sdim    case Intrinsic::x86_avx2_gather_d_ps:
2083239462Sdim    case Intrinsic::x86_avx2_gather_d_ps_256:
2084239462Sdim    case Intrinsic::x86_avx2_gather_q_ps:
2085239462Sdim    case Intrinsic::x86_avx2_gather_q_ps_256:
2086239462Sdim    case Intrinsic::x86_avx2_gather_d_q:
2087239462Sdim    case Intrinsic::x86_avx2_gather_d_q_256:
2088239462Sdim    case Intrinsic::x86_avx2_gather_q_q:
2089239462Sdim    case Intrinsic::x86_avx2_gather_q_q_256:
2090239462Sdim    case Intrinsic::x86_avx2_gather_d_d:
2091239462Sdim    case Intrinsic::x86_avx2_gather_d_d_256:
2092239462Sdim    case Intrinsic::x86_avx2_gather_q_d:
2093239462Sdim    case Intrinsic::x86_avx2_gather_q_d_256: {
2094261991Sdim      if (!Subtarget->hasAVX2())
2095261991Sdim        break;
2096239462Sdim      unsigned Opc;
2097239462Sdim      switch (IntNo) {
2098239462Sdim      default: llvm_unreachable("Impossible intrinsic");
2099239462Sdim      case Intrinsic::x86_avx2_gather_d_pd:     Opc = X86::VGATHERDPDrm;  break;
2100239462Sdim      case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
2101239462Sdim      case Intrinsic::x86_avx2_gather_q_pd:     Opc = X86::VGATHERQPDrm;  break;
2102239462Sdim      case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
2103239462Sdim      case Intrinsic::x86_avx2_gather_d_ps:     Opc = X86::VGATHERDPSrm;  break;
2104239462Sdim      case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
2105239462Sdim      case Intrinsic::x86_avx2_gather_q_ps:     Opc = X86::VGATHERQPSrm;  break;
2106239462Sdim      case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
2107239462Sdim      case Intrinsic::x86_avx2_gather_d_q:      Opc = X86::VPGATHERDQrm;  break;
2108239462Sdim      case Intrinsic::x86_avx2_gather_d_q_256:  Opc = X86::VPGATHERDQYrm; break;
2109239462Sdim      case Intrinsic::x86_avx2_gather_q_q:      Opc = X86::VPGATHERQQrm;  break;
2110239462Sdim      case Intrinsic::x86_avx2_gather_q_q_256:  Opc = X86::VPGATHERQQYrm; break;
2111239462Sdim      case Intrinsic::x86_avx2_gather_d_d:      Opc = X86::VPGATHERDDrm;  break;
2112239462Sdim      case Intrinsic::x86_avx2_gather_d_d_256:  Opc = X86::VPGATHERDDYrm; break;
2113239462Sdim      case Intrinsic::x86_avx2_gather_q_d:      Opc = X86::VPGATHERQDrm;  break;
2114239462Sdim      case Intrinsic::x86_avx2_gather_q_d_256:  Opc = X86::VPGATHERQDYrm; break;
2115239462Sdim      }
2116239462Sdim      SDNode *RetVal = SelectGather(Node, Opc);
2117239462Sdim      if (RetVal)
2118239462Sdim        // We already called ReplaceUses inside SelectGather.
2119276479Sdim        return nullptr;
2120239462Sdim      break;
2121239462Sdim    }
2122239462Sdim    }
2123239462Sdim    break;
2124239462Sdim  }
2125198090Srdivacky  case X86ISD::GlobalBaseReg:
2126198090Srdivacky    return getGlobalBaseReg();
2127193323Sed
2128239462Sdim
2129223017Sdim  case ISD::ATOMIC_LOAD_XOR:
2130223017Sdim  case ISD::ATOMIC_LOAD_AND:
2131243830Sdim  case ISD::ATOMIC_LOAD_OR:
2132243830Sdim  case ISD::ATOMIC_LOAD_ADD: {
2133223017Sdim    SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
2134223017Sdim    if (RetVal)
2135223017Sdim      return RetVal;
2136223017Sdim    break;
2137223017Sdim  }
2138221345Sdim  case ISD::AND:
2139221345Sdim  case ISD::OR:
2140221345Sdim  case ISD::XOR: {
2141221345Sdim    // For operations of the form (x << C1) op C2, check if we can use a smaller
2142221345Sdim    // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2143221345Sdim    SDValue N0 = Node->getOperand(0);
2144221345Sdim    SDValue N1 = Node->getOperand(1);
2145221345Sdim
2146221345Sdim    if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2147221345Sdim      break;
2148221345Sdim
2149221345Sdim    // i8 is unshrinkable, i16 should be promoted to i32.
2150221345Sdim    if (NVT != MVT::i32 && NVT != MVT::i64)
2151221345Sdim      break;
2152221345Sdim
2153221345Sdim    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
2154221345Sdim    ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2155221345Sdim    if (!Cst || !ShlCst)
2156221345Sdim      break;
2157221345Sdim
2158221345Sdim    int64_t Val = Cst->getSExtValue();
2159221345Sdim    uint64_t ShlVal = ShlCst->getZExtValue();
2160221345Sdim
2161221345Sdim    // Make sure that we don't change the operation by removing bits.
2162221345Sdim    // This only matters for OR and XOR, AND is unaffected.
2163243830Sdim    uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2164243830Sdim    if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2165221345Sdim      break;
2166221345Sdim
2167239462Sdim    unsigned ShlOp, Op;
2168261991Sdim    MVT CstVT = NVT;
2169221345Sdim
2170221345Sdim    // Check the minimum bitwidth for the new constant.
2171221345Sdim    // TODO: AND32ri is the same as AND64ri32 with zext imm.
2172221345Sdim    // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2173221345Sdim    // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2174221345Sdim    if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2175221345Sdim      CstVT = MVT::i8;
2176221345Sdim    else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2177221345Sdim      CstVT = MVT::i32;
2178221345Sdim
2179221345Sdim    // Bail if there is no smaller encoding.
2180221345Sdim    if (NVT == CstVT)
2181221345Sdim      break;
2182221345Sdim
2183261991Sdim    switch (NVT.SimpleTy) {
2184221345Sdim    default: llvm_unreachable("Unsupported VT!");
2185221345Sdim    case MVT::i32:
2186221345Sdim      assert(CstVT == MVT::i8);
2187221345Sdim      ShlOp = X86::SHL32ri;
2188221345Sdim
2189221345Sdim      switch (Opcode) {
2190239462Sdim      default: llvm_unreachable("Impossible opcode");
2191221345Sdim      case ISD::AND: Op = X86::AND32ri8; break;
2192221345Sdim      case ISD::OR:  Op =  X86::OR32ri8; break;
2193221345Sdim      case ISD::XOR: Op = X86::XOR32ri8; break;
2194221345Sdim      }
2195221345Sdim      break;
2196221345Sdim    case MVT::i64:
2197221345Sdim      assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2198221345Sdim      ShlOp = X86::SHL64ri;
2199221345Sdim
2200221345Sdim      switch (Opcode) {
2201239462Sdim      default: llvm_unreachable("Impossible opcode");
2202221345Sdim      case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2203221345Sdim      case ISD::OR:  Op = CstVT==MVT::i8?  X86::OR64ri8 :  X86::OR64ri32; break;
2204221345Sdim      case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2205221345Sdim      }
2206221345Sdim      break;
2207221345Sdim    }
2208221345Sdim
2209221345Sdim    // Emit the smaller op and the shift.
2210221345Sdim    SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
2211221345Sdim    SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2212221345Sdim    return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2213221345Sdim                                getI8Imm(ShlVal));
2214221345Sdim  }
2215218893Sdim  case X86ISD::UMUL: {
2216218893Sdim    SDValue N0 = Node->getOperand(0);
2217218893Sdim    SDValue N1 = Node->getOperand(1);
2218239462Sdim
2219218893Sdim    unsigned LoReg;
2220261991Sdim    switch (NVT.SimpleTy) {
2221218893Sdim    default: llvm_unreachable("Unsupported VT!");
2222218893Sdim    case MVT::i8:  LoReg = X86::AL;  Opc = X86::MUL8r; break;
2223218893Sdim    case MVT::i16: LoReg = X86::AX;  Opc = X86::MUL16r; break;
2224218893Sdim    case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2225218893Sdim    case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2226218893Sdim    }
2227239462Sdim
2228218893Sdim    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2229218893Sdim                                          N0, SDValue()).getValue(1);
2230239462Sdim
2231218893Sdim    SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2232218893Sdim    SDValue Ops[] = {N1, InFlag};
2233251662Sdim    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2234239462Sdim
2235218893Sdim    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
2236218893Sdim    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
2237218893Sdim    ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
2238276479Sdim    return nullptr;
2239218893Sdim  }
2240239462Sdim
2241198090Srdivacky  case ISD::SMUL_LOHI:
2242198090Srdivacky  case ISD::UMUL_LOHI: {
2243198090Srdivacky    SDValue N0 = Node->getOperand(0);
2244198090Srdivacky    SDValue N1 = Node->getOperand(1);
2245193323Sed
2246198090Srdivacky    bool isSigned = Opcode == ISD::SMUL_LOHI;
2247243830Sdim    bool hasBMI2 = Subtarget->hasBMI2();
2248198090Srdivacky    if (!isSigned) {
2249261991Sdim      switch (NVT.SimpleTy) {
2250198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2251198090Srdivacky      case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
2252198090Srdivacky      case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
2253243830Sdim      case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2254243830Sdim                     MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2255243830Sdim      case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2256243830Sdim                     MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2257193323Sed      }
2258198090Srdivacky    } else {
2259261991Sdim      switch (NVT.SimpleTy) {
2260198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2261198090Srdivacky      case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
2262198090Srdivacky      case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
2263198090Srdivacky      case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2264198090Srdivacky      case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2265193323Sed      }
2266198090Srdivacky    }
2267193323Sed
2268243830Sdim    unsigned SrcReg, LoReg, HiReg;
2269243830Sdim    switch (Opc) {
2270243830Sdim    default: llvm_unreachable("Unknown MUL opcode!");
2271243830Sdim    case X86::IMUL8r:
2272243830Sdim    case X86::MUL8r:
2273243830Sdim      SrcReg = LoReg = X86::AL; HiReg = X86::AH;
2274243830Sdim      break;
2275243830Sdim    case X86::IMUL16r:
2276243830Sdim    case X86::MUL16r:
2277243830Sdim      SrcReg = LoReg = X86::AX; HiReg = X86::DX;
2278243830Sdim      break;
2279243830Sdim    case X86::IMUL32r:
2280243830Sdim    case X86::MUL32r:
2281243830Sdim      SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2282243830Sdim      break;
2283243830Sdim    case X86::IMUL64r:
2284243830Sdim    case X86::MUL64r:
2285243830Sdim      SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2286243830Sdim      break;
2287243830Sdim    case X86::MULX32rr:
2288243830Sdim      SrcReg = X86::EDX; LoReg = HiReg = 0;
2289243830Sdim      break;
2290243830Sdim    case X86::MULX64rr:
2291243830Sdim      SrcReg = X86::RDX; LoReg = HiReg = 0;
2292243830Sdim      break;
2293198090Srdivacky    }
2294193323Sed
2295198090Srdivacky    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2296202375Srdivacky    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2297198090Srdivacky    // Multiply is commmutative.
2298198090Srdivacky    if (!foldedLoad) {
2299202375Srdivacky      foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2300198090Srdivacky      if (foldedLoad)
2301198090Srdivacky        std::swap(N0, N1);
2302198090Srdivacky    }
2303193323Sed
2304243830Sdim    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2305239462Sdim                                          N0, SDValue()).getValue(1);
2306243830Sdim    SDValue ResHi, ResLo;
2307198090Srdivacky
2308198090Srdivacky    if (foldedLoad) {
2309243830Sdim      SDValue Chain;
2310198090Srdivacky      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2311198090Srdivacky                        InFlag };
2312243830Sdim      if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
2313243830Sdim        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
2314251662Sdim        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2315243830Sdim        ResHi = SDValue(CNode, 0);
2316243830Sdim        ResLo = SDValue(CNode, 1);
2317243830Sdim        Chain = SDValue(CNode, 2);
2318243830Sdim        InFlag = SDValue(CNode, 3);
2319243830Sdim      } else {
2320243830Sdim        SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2321251662Sdim        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2322243830Sdim        Chain = SDValue(CNode, 0);
2323243830Sdim        InFlag = SDValue(CNode, 1);
2324243830Sdim      }
2325218893Sdim
2326198090Srdivacky      // Update the chain.
2327243830Sdim      ReplaceUses(N1.getValue(1), Chain);
2328198090Srdivacky    } else {
2329243830Sdim      SDValue Ops[] = { N1, InFlag };
2330243830Sdim      if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
2331243830Sdim        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
2332251662Sdim        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2333243830Sdim        ResHi = SDValue(CNode, 0);
2334243830Sdim        ResLo = SDValue(CNode, 1);
2335243830Sdim        InFlag = SDValue(CNode, 2);
2336243830Sdim      } else {
2337243830Sdim        SDVTList VTs = CurDAG->getVTList(MVT::Glue);
2338251662Sdim        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2339243830Sdim        InFlag = SDValue(CNode, 0);
2340243830Sdim      }
2341198090Srdivacky    }
2342198090Srdivacky
2343210299Sed    // Prevent use of AH in a REX instruction by referencing AX instead.
2344210299Sed    if (HiReg == X86::AH && Subtarget->is64Bit() &&
2345210299Sed        !SDValue(Node, 1).use_empty()) {
2346210299Sed      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2347210299Sed                                              X86::AX, MVT::i16, InFlag);
2348210299Sed      InFlag = Result.getValue(2);
2349210299Sed      // Get the low part if needed. Don't use getCopyFromReg for aliasing
2350210299Sed      // registers.
2351210299Sed      if (!SDValue(Node, 0).use_empty())
2352210299Sed        ReplaceUses(SDValue(Node, 1),
2353210299Sed          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2354210299Sed
2355210299Sed      // Shift AX down 8 bits.
2356210299Sed      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2357210299Sed                                              Result,
2358210299Sed                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
2359210299Sed      // Then truncate it down to i8.
2360210299Sed      ReplaceUses(SDValue(Node, 1),
2361210299Sed        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2362210299Sed    }
2363198090Srdivacky    // Copy the low half of the result, if it is needed.
2364202375Srdivacky    if (!SDValue(Node, 0).use_empty()) {
2365276479Sdim      if (!ResLo.getNode()) {
2366243830Sdim        assert(LoReg && "Register for low half is not defined!");
2367243830Sdim        ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
2368243830Sdim                                       InFlag);
2369243830Sdim        InFlag = ResLo.getValue(2);
2370243830Sdim      }
2371243830Sdim      ReplaceUses(SDValue(Node, 0), ResLo);
2372243830Sdim      DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
2373198090Srdivacky    }
2374198090Srdivacky    // Copy the high half of the result, if it is needed.
2375202375Srdivacky    if (!SDValue(Node, 1).use_empty()) {
2376276479Sdim      if (!ResHi.getNode()) {
2377243830Sdim        assert(HiReg && "Register for high half is not defined!");
2378243830Sdim        ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
2379243830Sdim                                       InFlag);
2380243830Sdim        InFlag = ResHi.getValue(2);
2381243830Sdim      }
2382243830Sdim      ReplaceUses(SDValue(Node, 1), ResHi);
2383243830Sdim      DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
2384198090Srdivacky    }
2385239462Sdim
2386276479Sdim    return nullptr;
2387198090Srdivacky  }
2388193323Sed
2389198090Srdivacky  case ISD::SDIVREM:
2390198090Srdivacky  case ISD::UDIVREM: {
2391198090Srdivacky    SDValue N0 = Node->getOperand(0);
2392198090Srdivacky    SDValue N1 = Node->getOperand(1);
2393193323Sed
2394198090Srdivacky    bool isSigned = Opcode == ISD::SDIVREM;
2395198090Srdivacky    if (!isSigned) {
2396261991Sdim      switch (NVT.SimpleTy) {
2397198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2398198090Srdivacky      case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
2399198090Srdivacky      case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
2400198090Srdivacky      case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
2401198090Srdivacky      case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
2402193323Sed      }
2403198090Srdivacky    } else {
2404261991Sdim      switch (NVT.SimpleTy) {
2405198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2406198090Srdivacky      case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
2407198090Srdivacky      case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
2408198090Srdivacky      case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
2409198090Srdivacky      case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
2410198090Srdivacky      }
2411198090Srdivacky    }
2412193323Sed
2413201360Srdivacky    unsigned LoReg, HiReg, ClrReg;
2414261991Sdim    unsigned SExtOpcode;
2415261991Sdim    switch (NVT.SimpleTy) {
2416198090Srdivacky    default: llvm_unreachable("Unsupported VT!");
2417198090Srdivacky    case MVT::i8:
2418201360Srdivacky      LoReg = X86::AL;  ClrReg = HiReg = X86::AH;
2419198090Srdivacky      SExtOpcode = X86::CBW;
2420198090Srdivacky      break;
2421198090Srdivacky    case MVT::i16:
2422198090Srdivacky      LoReg = X86::AX;  HiReg = X86::DX;
2423261991Sdim      ClrReg = X86::DX;
2424198090Srdivacky      SExtOpcode = X86::CWD;
2425198090Srdivacky      break;
2426198090Srdivacky    case MVT::i32:
2427201360Srdivacky      LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
2428198090Srdivacky      SExtOpcode = X86::CDQ;
2429198090Srdivacky      break;
2430198090Srdivacky    case MVT::i64:
2431201360Srdivacky      LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
2432198090Srdivacky      SExtOpcode = X86::CQO;
2433198090Srdivacky      break;
2434198090Srdivacky    }
2435193323Sed
2436198090Srdivacky    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2437202375Srdivacky    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2438198090Srdivacky    bool signBitIsZero = CurDAG->SignBitIsZero(N0);
2439198090Srdivacky
2440198090Srdivacky    SDValue InFlag;
2441198090Srdivacky    if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
2442198090Srdivacky      // Special case for div8, just use a move with zero extension to AX to
2443198090Srdivacky      // clear the upper 8 bits (AH).
2444198090Srdivacky      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
2445202375Srdivacky      if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2446198090Srdivacky        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
2447198090Srdivacky        Move =
2448223017Sdim          SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
2449251662Sdim                                         MVT::Other, Ops), 0);
2450198090Srdivacky        Chain = Move.getValue(1);
2451198090Srdivacky        ReplaceUses(N0.getValue(1), Chain);
2452193323Sed      } else {
2453198090Srdivacky        Move =
2454223017Sdim          SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
2455198090Srdivacky        Chain = CurDAG->getEntryNode();
2456198090Srdivacky      }
2457223017Sdim      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
2458198090Srdivacky      InFlag = Chain.getValue(1);
2459198090Srdivacky    } else {
2460198090Srdivacky      InFlag =
2461198090Srdivacky        CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
2462198090Srdivacky                             LoReg, N0, SDValue()).getValue(1);
2463198090Srdivacky      if (isSigned && !signBitIsZero) {
2464198090Srdivacky        // Sign extend the low part into the high part.
2465193323Sed        InFlag =
2466218893Sdim          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
2467198090Srdivacky      } else {
2468198090Srdivacky        // Zero out the high part, effectively zero extending the input.
2469261991Sdim        SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
2470261991Sdim        switch (NVT.SimpleTy) {
2471261991Sdim        case MVT::i16:
2472261991Sdim          ClrNode =
2473261991Sdim              SDValue(CurDAG->getMachineNode(
2474261991Sdim                          TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
2475261991Sdim                          CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)),
2476261991Sdim                      0);
2477261991Sdim          break;
2478261991Sdim        case MVT::i32:
2479261991Sdim          break;
2480261991Sdim        case MVT::i64:
2481261991Sdim          ClrNode =
2482261991Sdim              SDValue(CurDAG->getMachineNode(
2483261991Sdim                          TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2484261991Sdim                          CurDAG->getTargetConstant(0, MVT::i64), ClrNode,
2485261991Sdim                          CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
2486261991Sdim                      0);
2487261991Sdim          break;
2488261991Sdim        default:
2489261991Sdim          llvm_unreachable("Unexpected division source");
2490261991Sdim        }
2491261991Sdim
2492201360Srdivacky        InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
2493198090Srdivacky                                      ClrNode, InFlag).getValue(1);
2494193323Sed      }
2495198090Srdivacky    }
2496193323Sed
2497198090Srdivacky    if (foldedLoad) {
2498198090Srdivacky      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2499198090Srdivacky                        InFlag };
2500198090Srdivacky      SDNode *CNode =
2501251662Sdim        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
2502198090Srdivacky      InFlag = SDValue(CNode, 1);
2503198090Srdivacky      // Update the chain.
2504198090Srdivacky      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
2505198090Srdivacky    } else {
2506198090Srdivacky      InFlag =
2507218893Sdim        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
2508198090Srdivacky    }
2509198090Srdivacky
2510210299Sed    // Prevent use of AH in a REX instruction by referencing AX instead.
2511210299Sed    // Shift it down 8 bits.
2512261991Sdim    //
2513261991Sdim    // The current assumption of the register allocator is that isel
2514261991Sdim    // won't generate explicit references to the GPR8_NOREX registers. If
2515261991Sdim    // the allocator and/or the backend get enhanced to be more robust in
2516261991Sdim    // that regard, this can be, and should be, removed.
2517210299Sed    if (HiReg == X86::AH && Subtarget->is64Bit() &&
2518210299Sed        !SDValue(Node, 1).use_empty()) {
2519210299Sed      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2520210299Sed                                              X86::AX, MVT::i16, InFlag);
2521210299Sed      InFlag = Result.getValue(2);
2522210299Sed
2523210299Sed      // If we also need AL (the quotient), get it by extracting a subreg from
2524210299Sed      // Result. The fast register allocator does not like multiple CopyFromReg
2525210299Sed      // nodes using aliasing registers.
2526210299Sed      if (!SDValue(Node, 0).use_empty())
2527210299Sed        ReplaceUses(SDValue(Node, 0),
2528210299Sed          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2529210299Sed
2530210299Sed      // Shift AX right by 8 bits instead of using AH.
2531210299Sed      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2532210299Sed                                         Result,
2533210299Sed                                         CurDAG->getTargetConstant(8, MVT::i8)),
2534210299Sed                       0);
2535210299Sed      ReplaceUses(SDValue(Node, 1),
2536210299Sed        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2537210299Sed    }
2538198090Srdivacky    // Copy the division (low) result, if it is needed.
2539202375Srdivacky    if (!SDValue(Node, 0).use_empty()) {
2540198090Srdivacky      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2541198090Srdivacky                                                LoReg, NVT, InFlag);
2542198090Srdivacky      InFlag = Result.getValue(2);
2543202375Srdivacky      ReplaceUses(SDValue(Node, 0), Result);
2544204642Srdivacky      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2545198090Srdivacky    }
2546198090Srdivacky    // Copy the remainder (high) result, if it is needed.
2547202375Srdivacky    if (!SDValue(Node, 1).use_empty()) {
2548210299Sed      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2549210299Sed                                              HiReg, NVT, InFlag);
2550210299Sed      InFlag = Result.getValue(2);
2551202375Srdivacky      ReplaceUses(SDValue(Node, 1), Result);
2552204642Srdivacky      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2553198090Srdivacky    }
2554276479Sdim    return nullptr;
2555198090Srdivacky  }
2556193323Sed
2557239462Sdim  case X86ISD::CMP:
2558239462Sdim  case X86ISD::SUB: {
2559239462Sdim    // Sometimes a SUB is used to perform comparison.
2560239462Sdim    if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
2561239462Sdim      // This node is not a CMP.
2562239462Sdim      break;
2563198090Srdivacky    SDValue N0 = Node->getOperand(0);
2564198090Srdivacky    SDValue N1 = Node->getOperand(1);
2565198090Srdivacky
2566198090Srdivacky    // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2567198090Srdivacky    // use a smaller encoding.
2568212904Sdim    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
2569212904Sdim        HasNoSignedComparisonUses(Node))
2570207618Srdivacky      // Look past the truncate if CMP is the only use of it.
2571207618Srdivacky      N0 = N0.getOperand(0);
2572234353Sdim    if ((N0.getNode()->getOpcode() == ISD::AND ||
2573234353Sdim         (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
2574234353Sdim        N0.getNode()->hasOneUse() &&
2575198090Srdivacky        N0.getValueType() != MVT::i8 &&
2576198090Srdivacky        X86::isZeroNode(N1)) {
2577198090Srdivacky      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
2578198090Srdivacky      if (!C) break;
2579198090Srdivacky
2580198090Srdivacky      // For example, convert "testl %eax, $8" to "testb %al, $8"
2581198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
2582198090Srdivacky          (!(C->getZExtValue() & 0x80) ||
2583198090Srdivacky           HasNoSignedComparisonUses(Node))) {
2584198090Srdivacky        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
2585198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2586198090Srdivacky
2587198090Srdivacky        // On x86-32, only the ABCD registers have 8-bit subregisters.
2588198090Srdivacky        if (!Subtarget->is64Bit()) {
2589234353Sdim          const TargetRegisterClass *TRC;
2590261991Sdim          switch (N0.getSimpleValueType().SimpleTy) {
2591198090Srdivacky          case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2592198090Srdivacky          case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2593198090Srdivacky          default: llvm_unreachable("Unsupported TEST operand type!");
2594198090Srdivacky          }
2595198090Srdivacky          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
2596198090Srdivacky          Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2597198090Srdivacky                                               Reg.getValueType(), Reg, RC), 0);
2598198090Srdivacky        }
2599198090Srdivacky
2600198090Srdivacky        // Extract the l-register.
2601208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
2602198090Srdivacky                                                        MVT::i8, Reg);
2603198090Srdivacky
2604198090Srdivacky        // Emit a testb.
2605243830Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
2606243830Sdim                                                 Subreg, Imm);
2607243830Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2608243830Sdim        // one, do not call ReplaceAllUsesWith.
2609243830Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2610243830Sdim                    SDValue(NewNode, 0));
2611276479Sdim        return nullptr;
2612193323Sed      }
2613198090Srdivacky
2614198090Srdivacky      // For example, "testl %eax, $2048" to "testb %ah, $8".
2615198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
2616198090Srdivacky          (!(C->getZExtValue() & 0x8000) ||
2617198090Srdivacky           HasNoSignedComparisonUses(Node))) {
2618198090Srdivacky        // Shift the immediate right by 8 bits.
2619198090Srdivacky        SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
2620198090Srdivacky                                                       MVT::i8);
2621198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2622198090Srdivacky
2623198090Srdivacky        // Put the value in an ABCD register.
2624234353Sdim        const TargetRegisterClass *TRC;
2625261991Sdim        switch (N0.getSimpleValueType().SimpleTy) {
2626198090Srdivacky        case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
2627198090Srdivacky        case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2628198090Srdivacky        case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2629198090Srdivacky        default: llvm_unreachable("Unsupported TEST operand type!");
2630198090Srdivacky        }
2631198090Srdivacky        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
2632198090Srdivacky        Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2633198090Srdivacky                                             Reg.getValueType(), Reg, RC), 0);
2634198090Srdivacky
2635198090Srdivacky        // Extract the h-register.
2636208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
2637198090Srdivacky                                                        MVT::i8, Reg);
2638198090Srdivacky
2639226633Sdim        // Emit a testb.  The EXTRACT_SUBREG becomes a COPY that can only
2640226633Sdim        // target GR8_NOREX registers, so make sure the register class is
2641226633Sdim        // forced.
2642243830Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
2643243830Sdim                                                 MVT::i32, Subreg, ShiftedImm);
2644243830Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2645243830Sdim        // one, do not call ReplaceAllUsesWith.
2646243830Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2647243830Sdim                    SDValue(NewNode, 0));
2648276479Sdim        return nullptr;
2649193323Sed      }
2650198090Srdivacky
2651198090Srdivacky      // For example, "testl %eax, $32776" to "testw %ax, $32776".
2652198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
2653198090Srdivacky          N0.getValueType() != MVT::i16 &&
2654198090Srdivacky          (!(C->getZExtValue() & 0x8000) ||
2655198090Srdivacky           HasNoSignedComparisonUses(Node))) {
2656198090Srdivacky        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
2657198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2658198090Srdivacky
2659198090Srdivacky        // Extract the 16-bit subregister.
2660208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
2661198090Srdivacky                                                        MVT::i16, Reg);
2662198090Srdivacky
2663198090Srdivacky        // Emit a testw.
2664243830Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
2665243830Sdim                                                 Subreg, Imm);
2666243830Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2667243830Sdim        // one, do not call ReplaceAllUsesWith.
2668243830Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2669243830Sdim                    SDValue(NewNode, 0));
2670276479Sdim        return nullptr;
2671193323Sed      }
2672198090Srdivacky
2673198090Srdivacky      // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
2674198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
2675198090Srdivacky          N0.getValueType() == MVT::i64 &&
2676198090Srdivacky          (!(C->getZExtValue() & 0x80000000) ||
2677198090Srdivacky           HasNoSignedComparisonUses(Node))) {
2678198090Srdivacky        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
2679198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2680198090Srdivacky
2681198090Srdivacky        // Extract the 32-bit subregister.
2682208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
2683198090Srdivacky                                                        MVT::i32, Reg);
2684198090Srdivacky
2685198090Srdivacky        // Emit a testl.
2686243830Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
2687243830Sdim                                                 Subreg, Imm);
2688243830Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2689243830Sdim        // one, do not call ReplaceAllUsesWith.
2690243830Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2691243830Sdim                    SDValue(NewNode, 0));
2692276479Sdim        return nullptr;
2693198090Srdivacky      }
2694193323Sed    }
2695198090Srdivacky    break;
2696193323Sed  }
2697234353Sdim  case ISD::STORE: {
2698234353Sdim    // Change a chain of {load; incr or dec; store} of the same value into
2699234353Sdim    // a simple increment or decrement through memory of that value, if the
2700234353Sdim    // uses of the modified value and its address are suitable.
2701234353Sdim    // The DEC64m tablegen pattern is currently not able to match the case where
2702239462Sdim    // the EFLAGS on the original DEC are used. (This also applies to
2703234353Sdim    // {INC,DEC}X{64,32,16,8}.)
2704234353Sdim    // We'll need to improve tablegen to allow flags to be transferred from a
2705234353Sdim    // node in the pattern to the result node.  probably with a new keyword
2706234353Sdim    // for example, we have this
2707234353Sdim    // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2708234353Sdim    //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2709234353Sdim    //   (implicit EFLAGS)]>;
2710234353Sdim    // but maybe need something like this
2711234353Sdim    // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2712234353Sdim    //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2713234353Sdim    //   (transferrable EFLAGS)]>;
2714234353Sdim
2715234353Sdim    StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2716234353Sdim    SDValue StoredVal = StoreNode->getOperand(1);
2717234353Sdim    unsigned Opc = StoredVal->getOpcode();
2718234353Sdim
2719276479Sdim    LoadSDNode *LoadNode = nullptr;
2720234353Sdim    SDValue InputChain;
2721234353Sdim    if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
2722234353Sdim                             LoadNode, InputChain))
2723234353Sdim      break;
2724234353Sdim
2725234353Sdim    SDValue Base, Scale, Index, Disp, Segment;
2726234353Sdim    if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
2727234353Sdim                    Base, Scale, Index, Disp, Segment))
2728234353Sdim      break;
2729234353Sdim
2730234353Sdim    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2731234353Sdim    MemOp[0] = StoreNode->getMemOperand();
2732234353Sdim    MemOp[1] = LoadNode->getMemOperand();
2733234353Sdim    const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
2734239462Sdim    EVT LdVT = LoadNode->getMemoryVT();
2735234353Sdim    unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
2736234353Sdim    MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
2737261991Sdim                                                   SDLoc(Node),
2738251662Sdim                                                   MVT::i32, MVT::Other, Ops);
2739234353Sdim    Result->setMemRefs(MemOp, MemOp + 2);
2740234353Sdim
2741234353Sdim    ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2742234353Sdim    ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2743234353Sdim
2744234353Sdim    return Result;
2745198090Srdivacky  }
2746234353Sdim  }
2747193323Sed
2748202375Srdivacky  SDNode *ResNode = SelectCode(Node);
2749193323Sed
2750204642Srdivacky  DEBUG(dbgs() << "=> ";
2751276479Sdim        if (ResNode == nullptr || ResNode == Node)
2752204642Srdivacky          Node->dump(CurDAG);
2753204642Srdivacky        else
2754204642Srdivacky          ResNode->dump(CurDAG);
2755204642Srdivacky        dbgs() << '\n');
2756193323Sed
2757193323Sed  return ResNode;
2758193323Sed}
2759193323Sed
2760193323Sedbool X86DAGToDAGISel::
2761193323SedSelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
2762193323Sed                             std::vector<SDValue> &OutOps) {
2763193323Sed  SDValue Op0, Op1, Op2, Op3, Op4;
2764193323Sed  switch (ConstraintCode) {
2765193323Sed  case 'o':   // offsetable        ??
2766193323Sed  case 'v':   // not offsetable    ??
2767193323Sed  default: return true;
2768193323Sed  case 'm':   // memory
2769276479Sdim    if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
2770193323Sed      return true;
2771193323Sed    break;
2772193323Sed  }
2773239462Sdim
2774193323Sed  OutOps.push_back(Op0);
2775193323Sed  OutOps.push_back(Op1);
2776193323Sed  OutOps.push_back(Op2);
2777193323Sed  OutOps.push_back(Op3);
2778193323Sed  OutOps.push_back(Op4);
2779193323Sed  return false;
2780193323Sed}
2781193323Sed
2782239462Sdim/// createX86ISelDag - This pass converts a legalized DAG into a
2783193323Sed/// X86-specific DAG, ready for instruction scheduling.
2784193323Sed///
2785193323SedFunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
2786234353Sdim                                     CodeGenOpt::Level OptLevel) {
2787193323Sed  return new X86DAGToDAGISel(TM, OptLevel);
2788193323Sed}
2789