1193323Sed//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file defines a DAG pattern matching instruction selector for X86,
11193323Sed// converting from a legalized dag to a X86 dag.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15193323Sed#define DEBUG_TYPE "x86-isel"
16193323Sed#include "X86.h"
17193323Sed#include "X86InstrBuilder.h"
18193323Sed#include "X86MachineFunctionInfo.h"
19193323Sed#include "X86RegisterInfo.h"
20193323Sed#include "X86Subtarget.h"
21193323Sed#include "X86TargetMachine.h"
22252723Sdim#include "llvm/ADT/Statistic.h"
23252723Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
24193323Sed#include "llvm/CodeGen/MachineFunction.h"
25193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h"
26193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
27193323Sed#include "llvm/CodeGen/SelectionDAGISel.h"
28252723Sdim#include "llvm/IR/Instructions.h"
29252723Sdim#include "llvm/IR/Intrinsics.h"
30252723Sdim#include "llvm/IR/Type.h"
31193323Sed#include "llvm/Support/Debug.h"
32198090Srdivacky#include "llvm/Support/ErrorHandling.h"
33193323Sed#include "llvm/Support/MathExtras.h"
34198090Srdivacky#include "llvm/Support/raw_ostream.h"
35252723Sdim#include "llvm/Target/TargetMachine.h"
36252723Sdim#include "llvm/Target/TargetOptions.h"
37193323Sedusing namespace llvm;
38193323Sed
39193323SedSTATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
40193323Sed
41193323Sed//===----------------------------------------------------------------------===//
42193323Sed//                      Pattern Matcher Implementation
43193323Sed//===----------------------------------------------------------------------===//
44193323Sed
45193323Sednamespace {
46193323Sed  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
47193323Sed  /// SDValue's instead of register numbers for the leaves of the matched
48193323Sed  /// tree.
49193323Sed  struct X86ISelAddressMode {
50193323Sed    enum {
51193323Sed      RegBase,
52193323Sed      FrameIndexBase
53193323Sed    } BaseType;
54193323Sed
55207618Srdivacky    // This is really a union, discriminated by BaseType!
56207618Srdivacky    SDValue Base_Reg;
57207618Srdivacky    int Base_FrameIndex;
58193323Sed
59193323Sed    unsigned Scale;
60245431Sdim    SDValue IndexReg;
61193323Sed    int32_t Disp;
62193323Sed    SDValue Segment;
63207618Srdivacky    const GlobalValue *GV;
64207618Srdivacky    const Constant *CP;
65207618Srdivacky    const BlockAddress *BlockAddr;
66193323Sed    const char *ES;
67193323Sed    int JT;
68193323Sed    unsigned Align;    // CP alignment.
69195098Sed    unsigned char SymbolFlags;  // X86II::MO_*
70193323Sed
71193323Sed    X86ISelAddressMode()
72207618Srdivacky      : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
73198892Srdivacky        Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
74198090Srdivacky        SymbolFlags(X86II::MO_NO_FLAG) {
75193323Sed    }
76193323Sed
77193323Sed    bool hasSymbolicDisplacement() const {
78198892Srdivacky      return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
79193323Sed    }
80245431Sdim
81195098Sed    bool hasBaseOrIndexReg() const {
82263509Sdim      return BaseType == FrameIndexBase ||
83263509Sdim             IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
84195098Sed    }
85245431Sdim
86195098Sed    /// isRIPRelative - Return true if this addressing mode is already RIP
87195098Sed    /// relative.
88195098Sed    bool isRIPRelative() const {
89195098Sed      if (BaseType != RegBase) return false;
90195098Sed      if (RegisterSDNode *RegNode =
91207618Srdivacky            dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
92195098Sed        return RegNode->getReg() == X86::RIP;
93195098Sed      return false;
94195098Sed    }
95245431Sdim
96195098Sed    void setBaseReg(SDValue Reg) {
97195098Sed      BaseType = RegBase;
98207618Srdivacky      Base_Reg = Reg;
99195098Sed    }
100193323Sed
101245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
102193323Sed    void dump() {
103202375Srdivacky      dbgs() << "X86ISelAddressMode " << this << '\n';
104207618Srdivacky      dbgs() << "Base_Reg ";
105207618Srdivacky      if (Base_Reg.getNode() != 0)
106245431Sdim        Base_Reg.getNode()->dump();
107198090Srdivacky      else
108202375Srdivacky        dbgs() << "nul";
109207618Srdivacky      dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
110198090Srdivacky             << " Scale" << Scale << '\n'
111198090Srdivacky             << "IndexReg ";
112198090Srdivacky      if (IndexReg.getNode() != 0)
113198090Srdivacky        IndexReg.getNode()->dump();
114198090Srdivacky      else
115245431Sdim        dbgs() << "nul";
116202375Srdivacky      dbgs() << " Disp " << Disp << '\n'
117198090Srdivacky             << "GV ";
118198090Srdivacky      if (GV)
119198090Srdivacky        GV->dump();
120198090Srdivacky      else
121202375Srdivacky        dbgs() << "nul";
122202375Srdivacky      dbgs() << " CP ";
123198090Srdivacky      if (CP)
124198090Srdivacky        CP->dump();
125198090Srdivacky      else
126202375Srdivacky        dbgs() << "nul";
127202375Srdivacky      dbgs() << '\n'
128198090Srdivacky             << "ES ";
129198090Srdivacky      if (ES)
130202375Srdivacky        dbgs() << ES;
131198090Srdivacky      else
132202375Srdivacky        dbgs() << "nul";
133202375Srdivacky      dbgs() << " JT" << JT << " Align" << Align << '\n';
134193323Sed    }
135245431Sdim#endif
136193323Sed  };
137193323Sed}
138193323Sed
139193323Sednamespace {
140193323Sed  //===--------------------------------------------------------------------===//
141193323Sed  /// ISel - X86 specific code to select X86 machine instructions for
142193323Sed  /// SelectionDAG operations.
143193323Sed  ///
144198892Srdivacky  class X86DAGToDAGISel : public SelectionDAGISel {
145193323Sed    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
146193323Sed    /// make the right decision when generating code for different targets.
147193323Sed    const X86Subtarget *Subtarget;
148193323Sed
149193323Sed    /// OptForSize - If true, selector should try to optimize for code size
150193323Sed    /// instead of performance.
151193323Sed    bool OptForSize;
152193323Sed
153193323Sed  public:
154193323Sed    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
155193323Sed      : SelectionDAGISel(tm, OptLevel),
156193399Sed        Subtarget(&tm.getSubtarget<X86Subtarget>()),
157193323Sed        OptForSize(false) {}
158193323Sed
159193323Sed    virtual const char *getPassName() const {
160193323Sed      return "X86 DAG->DAG Instruction Selection";
161193323Sed    }
162193323Sed
163207618Srdivacky    virtual void EmitFunctionEntryCode();
164193323Sed
165203954Srdivacky    virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
166193323Sed
167204642Srdivacky    virtual void PreprocessISelDAG();
168203954Srdivacky
169212904Sdim    inline bool immSext8(SDNode *N) const {
170212904Sdim      return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
171212904Sdim    }
172212904Sdim
173212904Sdim    // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
174212904Sdim    // sign extended field.
175212904Sdim    inline bool i64immSExt32(SDNode *N) const {
176212904Sdim      uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
177212904Sdim      return (int64_t)v == (int32_t)v;
178212904Sdim    }
179212904Sdim
180193323Sed// Include the pieces autogenerated from the target description.
181193323Sed#include "X86GenDAGISel.inc"
182193323Sed
183193323Sed  private:
184202375Srdivacky    SDNode *Select(SDNode *N);
185245431Sdim    SDNode *SelectGather(SDNode *N, unsigned Opc);
186193323Sed    SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
187263509Sdim    SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
188193323Sed
189224145Sdim    bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
190218893Sdim    bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
191193323Sed    bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
192198090Srdivacky    bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
193198090Srdivacky    bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
194198090Srdivacky                                 unsigned Depth);
195193323Sed    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
196218893Sdim    bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
197193323Sed                    SDValue &Scale, SDValue &Index, SDValue &Disp,
198193323Sed                    SDValue &Segment);
199263509Sdim    bool SelectMOV64Imm32(SDValue N, SDValue &Imm);
200218893Sdim    bool SelectLEAAddr(SDValue N, SDValue &Base,
201210299Sed                       SDValue &Scale, SDValue &Index, SDValue &Disp,
202210299Sed                       SDValue &Segment);
203263509Sdim    bool SelectLEA64_32Addr(SDValue N, SDValue &Base,
204263509Sdim                            SDValue &Scale, SDValue &Index, SDValue &Disp,
205263509Sdim                            SDValue &Segment);
206218893Sdim    bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
207210299Sed                           SDValue &Scale, SDValue &Index, SDValue &Disp,
208210299Sed                           SDValue &Segment);
209204642Srdivacky    bool SelectScalarSSELoad(SDNode *Root, SDValue N,
210204642Srdivacky                             SDValue &Base, SDValue &Scale,
211193323Sed                             SDValue &Index, SDValue &Disp,
212193323Sed                             SDValue &Segment,
213204642Srdivacky                             SDValue &NodeWithChain);
214245431Sdim
215202375Srdivacky    bool TryFoldLoad(SDNode *P, SDValue N,
216193323Sed                     SDValue &Base, SDValue &Scale,
217193323Sed                     SDValue &Index, SDValue &Disp,
218193323Sed                     SDValue &Segment);
219245431Sdim
220193323Sed    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
221193323Sed    /// inline asm expressions.
222193323Sed    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
223193323Sed                                              char ConstraintCode,
224193323Sed                                              std::vector<SDValue> &OutOps);
225245431Sdim
226193323Sed    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
227193323Sed
228245431Sdim    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
229193323Sed                                   SDValue &Scale, SDValue &Index,
230193323Sed                                   SDValue &Disp, SDValue &Segment) {
231193323Sed      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
232263509Sdim        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
233263509Sdim                                    getTargetLowering()->getPointerTy()) :
234207618Srdivacky        AM.Base_Reg;
235193323Sed      Scale = getI8Imm(AM.Scale);
236193323Sed      Index = AM.IndexReg;
237193323Sed      // These are 32-bit even in 64-bit mode since RIP relative offset
238193323Sed      // is 32-bit.
239193323Sed      if (AM.GV)
240263509Sdim        Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
241210299Sed                                              MVT::i32, AM.Disp,
242195098Sed                                              AM.SymbolFlags);
243193323Sed      else if (AM.CP)
244193323Sed        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
245195098Sed                                             AM.Align, AM.Disp, AM.SymbolFlags);
246245431Sdim      else if (AM.ES) {
247245431Sdim        assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
248195098Sed        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
249245431Sdim      } else if (AM.JT != -1) {
250245431Sdim        assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
251195098Sed        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
252245431Sdim      } else if (AM.BlockAddr)
253245431Sdim        Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
254245431Sdim                                             AM.SymbolFlags);
255193323Sed      else
256193323Sed        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
257193323Sed
258193323Sed      if (AM.Segment.getNode())
259193323Sed        Segment = AM.Segment;
260193323Sed      else
261193323Sed        Segment = CurDAG->getRegister(0, MVT::i32);
262193323Sed    }
263193323Sed
264193323Sed    /// getI8Imm - Return a target constant with the specified value, of type
265193323Sed    /// i8.
266193323Sed    inline SDValue getI8Imm(unsigned Imm) {
267193323Sed      return CurDAG->getTargetConstant(Imm, MVT::i8);
268193323Sed    }
269193323Sed
270193323Sed    /// getI32Imm - Return a target constant with the specified value, of type
271193323Sed    /// i32.
272193323Sed    inline SDValue getI32Imm(unsigned Imm) {
273193323Sed      return CurDAG->getTargetConstant(Imm, MVT::i32);
274193323Sed    }
275193323Sed
276193323Sed    /// getGlobalBaseReg - Return an SDNode that returns the value of
277193323Sed    /// the global base register. Output instructions required to
278193323Sed    /// initialize the global base register, if necessary.
279193323Sed    ///
280193323Sed    SDNode *getGlobalBaseReg();
281193323Sed
282193399Sed    /// getTargetMachine - Return a reference to the TargetMachine, casted
283193399Sed    /// to the target-specific type.
284252723Sdim    const X86TargetMachine &getTargetMachine() const {
285193399Sed      return static_cast<const X86TargetMachine &>(TM);
286193399Sed    }
287193399Sed
288193399Sed    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
289193399Sed    /// to the target-specific type.
290252723Sdim    const X86InstrInfo *getInstrInfo() const {
291193399Sed      return getTargetMachine().getInstrInfo();
292193399Sed    }
293193323Sed  };
294193323Sed}
295193323Sed
296193323Sed
297203954Srdivackybool
298203954SrdivackyX86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
299193323Sed  if (OptLevel == CodeGenOpt::None) return false;
300193323Sed
301203954Srdivacky  if (!N.hasOneUse())
302203954Srdivacky    return false;
303203954Srdivacky
304203954Srdivacky  if (N.getOpcode() != ISD::LOAD)
305203954Srdivacky    return true;
306203954Srdivacky
307203954Srdivacky  // If N is a load, do additional profitability checks.
308203954Srdivacky  if (U == Root) {
309193323Sed    switch (U->getOpcode()) {
310193323Sed    default: break;
311202375Srdivacky    case X86ISD::ADD:
312202375Srdivacky    case X86ISD::SUB:
313202375Srdivacky    case X86ISD::AND:
314202375Srdivacky    case X86ISD::XOR:
315202375Srdivacky    case X86ISD::OR:
316193323Sed    case ISD::ADD:
317193323Sed    case ISD::ADDC:
318193323Sed    case ISD::ADDE:
319193323Sed    case ISD::AND:
320193323Sed    case ISD::OR:
321193323Sed    case ISD::XOR: {
322193323Sed      SDValue Op1 = U->getOperand(1);
323193323Sed
324193323Sed      // If the other operand is a 8-bit immediate we should fold the immediate
325193323Sed      // instead. This reduces code size.
326193323Sed      // e.g.
327193323Sed      // movl 4(%esp), %eax
328193323Sed      // addl $4, %eax
329193323Sed      // vs.
330193323Sed      // movl $4, %eax
331193323Sed      // addl 4(%esp), %eax
332193323Sed      // The former is 2 bytes shorter. In case where the increment is 1, then
333193323Sed      // the saving can be 4 bytes (by using incl %eax).
334193323Sed      if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
335193323Sed        if (Imm->getAPIntValue().isSignedIntN(8))
336193323Sed          return false;
337193323Sed
338193323Sed      // If the other operand is a TLS address, we should fold it instead.
339193323Sed      // This produces
340193323Sed      // movl    %gs:0, %eax
341193323Sed      // leal    i@NTPOFF(%eax), %eax
342193323Sed      // instead of
343193323Sed      // movl    $i@NTPOFF, %eax
344193323Sed      // addl    %gs:0, %eax
345193323Sed      // if the block also has an access to a second TLS address this will save
346193323Sed      // a load.
347193323Sed      // FIXME: This is probably also true for non TLS addresses.
348193323Sed      if (Op1.getOpcode() == X86ISD::Wrapper) {
349193323Sed        SDValue Val = Op1.getOperand(0);
350193323Sed        if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
351193323Sed          return false;
352193323Sed      }
353193323Sed    }
354193323Sed    }
355203954Srdivacky  }
356193323Sed
357203954Srdivacky  return true;
358203954Srdivacky}
359203954Srdivacky
360205218Srdivacky/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
361205218Srdivacky/// load's chain operand and move load below the call's chain operand.
362205218Srdivackystatic void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
363245431Sdim                               SDValue Call, SDValue OrigChain) {
364193323Sed  SmallVector<SDValue, 8> Ops;
365205218Srdivacky  SDValue Chain = OrigChain.getOperand(0);
366193323Sed  if (Chain.getNode() == Load.getNode())
367193323Sed    Ops.push_back(Load.getOperand(0));
368193323Sed  else {
369193323Sed    assert(Chain.getOpcode() == ISD::TokenFactor &&
370205218Srdivacky           "Unexpected chain operand");
371193323Sed    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
372193323Sed      if (Chain.getOperand(i).getNode() == Load.getNode())
373193323Sed        Ops.push_back(Load.getOperand(0));
374193323Sed      else
375193323Sed        Ops.push_back(Chain.getOperand(i));
376193323Sed    SDValue NewChain =
377263509Sdim      CurDAG->getNode(ISD::TokenFactor, SDLoc(Load),
378193323Sed                      MVT::Other, &Ops[0], Ops.size());
379193323Sed    Ops.clear();
380193323Sed    Ops.push_back(NewChain);
381193323Sed  }
382205218Srdivacky  for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
383205218Srdivacky    Ops.push_back(OrigChain.getOperand(i));
384210299Sed  CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
385210299Sed  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
386193323Sed                             Load.getOperand(1), Load.getOperand(2));
387245431Sdim
388245431Sdim  unsigned NumOps = Call.getNode()->getNumOperands();
389193323Sed  Ops.clear();
390193323Sed  Ops.push_back(SDValue(Load.getNode(), 1));
391245431Sdim  for (unsigned i = 1, e = NumOps; i != e; ++i)
392193323Sed    Ops.push_back(Call.getOperand(i));
393245431Sdim  CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
394193323Sed}
395193323Sed
396193323Sed/// isCalleeLoad - Return true if call address is a load and it can be
397193323Sed/// moved below CALLSEQ_START and the chains leading up to the call.
398193323Sed/// Return the CALLSEQ_START by reference as a second output.
399205218Srdivacky/// In the case of a tail call, there isn't a callseq node between the call
400205218Srdivacky/// chain and the load.
401205218Srdivackystatic bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
402245431Sdim  // The transformation is somewhat dangerous if the call's chain was glued to
403245431Sdim  // the call. After MoveBelowOrigChain the load is moved between the call and
404245431Sdim  // the chain, this can create a cycle if the load is not folded. So it is
405245431Sdim  // *really* important that we are sure the load will be folded.
406193323Sed  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
407193323Sed    return false;
408193323Sed  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
409193323Sed  if (!LD ||
410193323Sed      LD->isVolatile() ||
411193323Sed      LD->getAddressingMode() != ISD::UNINDEXED ||
412193323Sed      LD->getExtensionType() != ISD::NON_EXTLOAD)
413193323Sed    return false;
414193323Sed
415193323Sed  // Now let's find the callseq_start.
416205218Srdivacky  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
417193323Sed    if (!Chain.hasOneUse())
418193323Sed      return false;
419193323Sed    Chain = Chain.getOperand(0);
420193323Sed  }
421205218Srdivacky
422205218Srdivacky  if (!Chain.getNumOperands())
423205218Srdivacky    return false;
424252723Sdim  // Since we are not checking for AA here, conservatively abort if the chain
425252723Sdim  // writes to memory. It's not safe to move the callee (a load) across a store.
426252723Sdim  if (isa<MemSDNode>(Chain.getNode()) &&
427252723Sdim      cast<MemSDNode>(Chain.getNode())->writeMem())
428252723Sdim    return false;
429193323Sed  if (Chain.getOperand(0).getNode() == Callee.getNode())
430193323Sed    return true;
431193323Sed  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
432198090Srdivacky      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
433198090Srdivacky      Callee.getValue(1).hasOneUse())
434193323Sed    return true;
435193323Sed  return false;
436193323Sed}
437193323Sed
438204642Srdivackyvoid X86DAGToDAGISel::PreprocessISelDAG() {
439204792Srdivacky  // OptForSize is used in pattern predicates that isel is matching.
440252723Sdim  OptForSize = MF->getFunction()->getAttributes().
441252723Sdim    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
442245431Sdim
443204642Srdivacky  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
444204642Srdivacky       E = CurDAG->allnodes_end(); I != E; ) {
445204642Srdivacky    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
446193323Sed
447205218Srdivacky    if (OptLevel != CodeGenOpt::None &&
448252723Sdim        // Only does this when target favors doesn't favor register indirect
449252723Sdim        // call.
450252723Sdim        ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
451245431Sdim         (N->getOpcode() == X86ISD::TC_RETURN &&
452252723Sdim          // Only does this if load can be folded into TC_RETURN.
453245431Sdim          (Subtarget->is64Bit() ||
454245431Sdim           getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
455193323Sed      /// Also try moving call address load from outside callseq_start to just
456193323Sed      /// before the call to allow it to be folded.
457193323Sed      ///
458193323Sed      ///     [Load chain]
459193323Sed      ///         ^
460193323Sed      ///         |
461193323Sed      ///       [Load]
462193323Sed      ///       ^    ^
463193323Sed      ///       |    |
464193323Sed      ///      /      \--
465193323Sed      ///     /          |
466193323Sed      ///[CALLSEQ_START] |
467193323Sed      ///     ^          |
468193323Sed      ///     |          |
469193323Sed      /// [LOAD/C2Reg]   |
470193323Sed      ///     |          |
471193323Sed      ///      \        /
472193323Sed      ///       \      /
473193323Sed      ///       [CALL]
474205218Srdivacky      bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
475204642Srdivacky      SDValue Chain = N->getOperand(0);
476204642Srdivacky      SDValue Load  = N->getOperand(1);
477205218Srdivacky      if (!isCalleeLoad(Load, Chain, HasCallSeq))
478193323Sed        continue;
479205218Srdivacky      MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
480193323Sed      ++NumLoadMoved;
481193323Sed      continue;
482193323Sed    }
483245431Sdim
484204642Srdivacky    // Lower fpround and fpextend nodes that target the FP stack to be store and
485204642Srdivacky    // load to the stack.  This is a gross hack.  We would like to simply mark
486204642Srdivacky    // these as being illegal, but when we do that, legalize produces these when
487204642Srdivacky    // it expands calls, then expands these in the same legalize pass.  We would
488204642Srdivacky    // like dag combine to be able to hack on these between the call expansion
489204642Srdivacky    // and the node legalization.  As such this pass basically does "really
490204642Srdivacky    // late" legalization of these inline with the X86 isel pass.
491204642Srdivacky    // FIXME: This should only happen when not compiled with -O0.
492193323Sed    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
493193323Sed      continue;
494245431Sdim
495263509Sdim    MVT SrcVT = N->getOperand(0).getSimpleValueType();
496263509Sdim    MVT DstVT = N->getSimpleValueType(0);
497226890Sdim
498226890Sdim    // If any of the sources are vectors, no fp stack involved.
499226890Sdim    if (SrcVT.isVector() || DstVT.isVector())
500226890Sdim      continue;
501226890Sdim
502193323Sed    // If the source and destination are SSE registers, then this is a legal
503193323Sed    // conversion that should not be lowered.
504263509Sdim    const X86TargetLowering *X86Lowering =
505263509Sdim        static_cast<const X86TargetLowering *>(getTargetLowering());
506263509Sdim    bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
507263509Sdim    bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
508193323Sed    if (SrcIsSSE && DstIsSSE)
509193323Sed      continue;
510193323Sed
511193323Sed    if (!SrcIsSSE && !DstIsSSE) {
512193323Sed      // If this is an FPStack extension, it is a noop.
513193323Sed      if (N->getOpcode() == ISD::FP_EXTEND)
514193323Sed        continue;
515193323Sed      // If this is a value-preserving FPStack truncation, it is a noop.
516193323Sed      if (N->getConstantOperandVal(1))
517193323Sed        continue;
518193323Sed    }
519245431Sdim
520193323Sed    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
521193323Sed    // FPStack has extload and truncstore.  SSE can fold direct loads into other
522193323Sed    // operations.  Based on this, decide what we want to do.
523263509Sdim    MVT MemVT;
524193323Sed    if (N->getOpcode() == ISD::FP_ROUND)
525193323Sed      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
526193323Sed    else
527193323Sed      MemVT = SrcIsSSE ? SrcVT : DstVT;
528245431Sdim
529193323Sed    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
530263509Sdim    SDLoc dl(N);
531245431Sdim
532193323Sed    // FIXME: optimize the case where the src/dest is a load or store?
533193323Sed    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
534193323Sed                                          N->getOperand(0),
535218893Sdim                                          MemTmp, MachinePointerInfo(), MemVT,
536203954Srdivacky                                          false, false, 0);
537218893Sdim    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
538218893Sdim                                        MachinePointerInfo(),
539218893Sdim                                        MemVT, false, false, 0);
540193323Sed
541193323Sed    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
542193323Sed    // extload we created.  This will cause general havok on the dag because
543193323Sed    // anything below the conversion could be folded into other existing nodes.
544193323Sed    // To avoid invalidating 'I', back it up to the convert node.
545193323Sed    --I;
546193323Sed    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
547245431Sdim
548193323Sed    // Now that we did that, the node is dead.  Increment the iterator to the
549193323Sed    // next node to process, then delete N.
550193323Sed    ++I;
551193323Sed    CurDAG->DeleteNode(N);
552245431Sdim  }
553193323Sed}
554193323Sed
555193323Sed
556193323Sed/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
557193323Sed/// the main function.
558193323Sedvoid X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
559193323Sed                                             MachineFrameInfo *MFI) {
560193323Sed  const TargetInstrInfo *TII = TM.getInstrInfo();
561218893Sdim  if (Subtarget->isTargetCygMing()) {
562218893Sdim    unsigned CallOp =
563235633Sdim      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
564206124Srdivacky    BuildMI(BB, DebugLoc(),
565218893Sdim            TII->get(CallOp)).addExternalSymbol("__main");
566218893Sdim  }
567193323Sed}
568193323Sed
569207618Srdivackyvoid X86DAGToDAGISel::EmitFunctionEntryCode() {
570193323Sed  // If this is main, emit special code for main.
571207618Srdivacky  if (const Function *Fn = MF->getFunction())
572207618Srdivacky    if (Fn->hasExternalLinkage() && Fn->getName() == "main")
573207618Srdivacky      EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
574193323Sed}
575193323Sed
576224145Sdimstatic bool isDispSafeForFrameIndex(int64_t Val) {
577224145Sdim  // On 64-bit platforms, we can run into an issue where a frame index
578224145Sdim  // includes a displacement that, when added to the explicit displacement,
579224145Sdim  // will overflow the displacement field. Assuming that the frame index
580224145Sdim  // displacement fits into a 31-bit integer  (which is only slightly more
581224145Sdim  // aggressive than the current fundamental assumption that it fits into
582224145Sdim  // a 32-bit integer), a 31-bit disp should always be safe.
583224145Sdim  return isInt<31>(Val);
584224145Sdim}
585193323Sed
586224145Sdimbool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
587224145Sdim                                            X86ISelAddressMode &AM) {
588224145Sdim  int64_t Val = AM.Disp + Offset;
589224145Sdim  CodeModel::Model M = TM.getCodeModel();
590224145Sdim  if (Subtarget->is64Bit()) {
591224145Sdim    if (!X86::isOffsetSuitableForCodeModel(Val, M,
592224145Sdim                                           AM.hasSymbolicDisplacement()))
593224145Sdim      return true;
594224145Sdim    // In addition to the checks required for a register base, check that
595224145Sdim    // we do not try to use an unsafe Disp with a frame index.
596224145Sdim    if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
597224145Sdim        !isDispSafeForFrameIndex(Val))
598224145Sdim      return true;
599224145Sdim  }
600224145Sdim  AM.Disp = Val;
601224145Sdim  return false;
602224145Sdim
603224145Sdim}
604224145Sdim
605218893Sdimbool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
606218893Sdim  SDValue Address = N->getOperand(1);
607245431Sdim
608218893Sdim  // load gs:0 -> GS segment register.
609218893Sdim  // load fs:0 -> FS segment register.
610218893Sdim  //
611193323Sed  // This optimization is valid because the GNU TLS model defines that
612193323Sed  // gs:0 (or fs:0 on X86-64) contains its own address.
613193323Sed  // For more information see http://people.redhat.com/drepper/tls.pdf
614218893Sdim  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
615218893Sdim    if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
616245431Sdim        Subtarget->isTargetLinux())
617218893Sdim      switch (N->getPointerInfo().getAddrSpace()) {
618218893Sdim      case 256:
619218893Sdim        AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
620218893Sdim        return false;
621218893Sdim      case 257:
622218893Sdim        AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
623218893Sdim        return false;
624218893Sdim      }
625245431Sdim
626193323Sed  return true;
627193323Sed}
628193323Sed
629195098Sed/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
630195098Sed/// into an addressing mode.  These wrap things that will resolve down into a
631195098Sed/// symbol reference.  If no match is possible, this returns true, otherwise it
632198090Srdivacky/// returns false.
633193323Sedbool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
634195098Sed  // If the addressing mode already has a symbol as the displacement, we can
635195098Sed  // never match another symbol.
636193323Sed  if (AM.hasSymbolicDisplacement())
637193323Sed    return true;
638193323Sed
639193323Sed  SDValue N0 = N.getOperand(0);
640198090Srdivacky  CodeModel::Model M = TM.getCodeModel();
641198090Srdivacky
642195098Sed  // Handle X86-64 rip-relative addresses.  We check this before checking direct
643195098Sed  // folding because RIP is preferable to non-RIP accesses.
644235633Sdim  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
645195098Sed      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
646195098Sed      // they cannot be folded into immediate fields.
647195098Sed      // FIXME: This can be improved for kernel and other models?
648235633Sdim      (M == CodeModel::Small || M == CodeModel::Kernel)) {
649235633Sdim    // Base and index reg must be 0 in order to use %rip as base.
650235633Sdim    if (AM.hasBaseOrIndexReg())
651235633Sdim      return true;
652195098Sed    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
653224145Sdim      X86ISelAddressMode Backup = AM;
654195098Sed      AM.GV = G->getGlobal();
655195098Sed      AM.SymbolFlags = G->getTargetFlags();
656224145Sdim      if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
657224145Sdim        AM = Backup;
658224145Sdim        return true;
659224145Sdim      }
660195098Sed    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
661224145Sdim      X86ISelAddressMode Backup = AM;
662195098Sed      AM.CP = CP->getConstVal();
663195098Sed      AM.Align = CP->getAlignment();
664195098Sed      AM.SymbolFlags = CP->getTargetFlags();
665224145Sdim      if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
666224145Sdim        AM = Backup;
667224145Sdim        return true;
668224145Sdim      }
669195098Sed    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
670195098Sed      AM.ES = S->getSymbol();
671195098Sed      AM.SymbolFlags = S->getTargetFlags();
672198892Srdivacky    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
673195098Sed      AM.JT = J->getIndex();
674195098Sed      AM.SymbolFlags = J->getTargetFlags();
675245431Sdim    } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
676245431Sdim      X86ISelAddressMode Backup = AM;
677245431Sdim      AM.BlockAddr = BA->getBlockAddress();
678245431Sdim      AM.SymbolFlags = BA->getTargetFlags();
679245431Sdim      if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
680245431Sdim        AM = Backup;
681245431Sdim        return true;
682245431Sdim      }
683245431Sdim    } else
684245431Sdim      llvm_unreachable("Unhandled symbol reference node.");
685198090Srdivacky
686195098Sed    if (N.getOpcode() == X86ISD::WrapperRIP)
687195098Sed      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
688195098Sed    return false;
689195098Sed  }
690195098Sed
691195098Sed  // Handle the case when globals fit in our immediate field: This is true for
692235633Sdim  // X86-32 always and X86-64 when in -mcmodel=small mode.  In 64-bit
693235633Sdim  // mode, this only applies to a non-RIP-relative computation.
694195098Sed  if (!Subtarget->is64Bit() ||
695235633Sdim      M == CodeModel::Small || M == CodeModel::Kernel) {
696235633Sdim    assert(N.getOpcode() != X86ISD::WrapperRIP &&
697235633Sdim           "RIP-relative addressing already handled");
698195098Sed    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
699195098Sed      AM.GV = G->getGlobal();
700195098Sed      AM.Disp += G->getOffset();
701195098Sed      AM.SymbolFlags = G->getTargetFlags();
702195098Sed    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
703193323Sed      AM.CP = CP->getConstVal();
704193323Sed      AM.Align = CP->getAlignment();
705195098Sed      AM.Disp += CP->getOffset();
706195098Sed      AM.SymbolFlags = CP->getTargetFlags();
707195098Sed    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
708195098Sed      AM.ES = S->getSymbol();
709195098Sed      AM.SymbolFlags = S->getTargetFlags();
710198892Srdivacky    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
711195098Sed      AM.JT = J->getIndex();
712195098Sed      AM.SymbolFlags = J->getTargetFlags();
713245431Sdim    } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
714245431Sdim      AM.BlockAddr = BA->getBlockAddress();
715245431Sdim      AM.Disp += BA->getOffset();
716245431Sdim      AM.SymbolFlags = BA->getTargetFlags();
717245431Sdim    } else
718245431Sdim      llvm_unreachable("Unhandled symbol reference node.");
719193323Sed    return false;
720193323Sed  }
721193323Sed
722193323Sed  return true;
723193323Sed}
724193323Sed
725193323Sed/// MatchAddress - Add the specified node to the specified addressing mode,
726193323Sed/// returning true if it cannot be done.  This just pattern matches for the
727193323Sed/// addressing mode.
728198090Srdivackybool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
729210299Sed  if (MatchAddressRecursively(N, AM, 0))
730198090Srdivacky    return true;
731198090Srdivacky
732198090Srdivacky  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
733198090Srdivacky  // a smaller encoding and avoids a scaled-index.
734198090Srdivacky  if (AM.Scale == 2 &&
735198090Srdivacky      AM.BaseType == X86ISelAddressMode::RegBase &&
736207618Srdivacky      AM.Base_Reg.getNode() == 0) {
737207618Srdivacky    AM.Base_Reg = AM.IndexReg;
738198090Srdivacky    AM.Scale = 1;
739198090Srdivacky  }
740198090Srdivacky
741198090Srdivacky  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
742198090Srdivacky  // because it has a smaller encoding.
743198090Srdivacky  // TODO: Which other code models can use this?
744198090Srdivacky  if (TM.getCodeModel() == CodeModel::Small &&
745198090Srdivacky      Subtarget->is64Bit() &&
746198090Srdivacky      AM.Scale == 1 &&
747198090Srdivacky      AM.BaseType == X86ISelAddressMode::RegBase &&
748207618Srdivacky      AM.Base_Reg.getNode() == 0 &&
749198090Srdivacky      AM.IndexReg.getNode() == 0 &&
750198090Srdivacky      AM.SymbolFlags == X86II::MO_NO_FLAG &&
751198090Srdivacky      AM.hasSymbolicDisplacement())
752207618Srdivacky    AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
753198090Srdivacky
754198090Srdivacky  return false;
755198090Srdivacky}
756198090Srdivacky
757235633Sdim// Insert a node into the DAG at least before the Pos node's position. This
758235633Sdim// will reposition the node as needed, and will assign it a node ID that is <=
759235633Sdim// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
760235633Sdim// IDs! The selection DAG must no longer depend on their uniqueness when this
761235633Sdim// is used.
762235633Sdimstatic void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
763235633Sdim  if (N.getNode()->getNodeId() == -1 ||
764235633Sdim      N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
765235633Sdim    DAG.RepositionNode(Pos.getNode(), N.getNode());
766235633Sdim    N.getNode()->setNodeId(Pos.getNode()->getNodeId());
767235633Sdim  }
768235633Sdim}
769235633Sdim
770235633Sdim// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
771235633Sdim// allows us to convert the shift and and into an h-register extract and
772235633Sdim// a scaled index. Returns false if the simplification is performed.
773235633Sdimstatic bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
774235633Sdim                                      uint64_t Mask,
775235633Sdim                                      SDValue Shift, SDValue X,
776235633Sdim                                      X86ISelAddressMode &AM) {
777235633Sdim  if (Shift.getOpcode() != ISD::SRL ||
778235633Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)) ||
779235633Sdim      !Shift.hasOneUse())
780235633Sdim    return true;
781235633Sdim
782235633Sdim  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
783235633Sdim  if (ScaleLog <= 0 || ScaleLog >= 4 ||
784235633Sdim      Mask != (0xffu << ScaleLog))
785235633Sdim    return true;
786235633Sdim
787263509Sdim  MVT VT = N.getSimpleValueType();
788263509Sdim  SDLoc DL(N);
789235633Sdim  SDValue Eight = DAG.getConstant(8, MVT::i8);
790235633Sdim  SDValue NewMask = DAG.getConstant(0xff, VT);
791235633Sdim  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
792235633Sdim  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
793235633Sdim  SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
794235633Sdim  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
795235633Sdim
796235633Sdim  // Insert the new nodes into the topological ordering. We must do this in
797235633Sdim  // a valid topological ordering as nothing is going to go back and re-sort
798235633Sdim  // these nodes. We continually insert before 'N' in sequence as this is
799235633Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
800235633Sdim  // hierarchy left to express.
801235633Sdim  InsertDAGNode(DAG, N, Eight);
802235633Sdim  InsertDAGNode(DAG, N, Srl);
803235633Sdim  InsertDAGNode(DAG, N, NewMask);
804235633Sdim  InsertDAGNode(DAG, N, And);
805235633Sdim  InsertDAGNode(DAG, N, ShlCount);
806235633Sdim  InsertDAGNode(DAG, N, Shl);
807235633Sdim  DAG.ReplaceAllUsesWith(N, Shl);
808235633Sdim  AM.IndexReg = And;
809235633Sdim  AM.Scale = (1 << ScaleLog);
810235633Sdim  return false;
811235633Sdim}
812235633Sdim
813235633Sdim// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
814235633Sdim// allows us to fold the shift into this addressing mode. Returns false if the
815235633Sdim// transform succeeded.
816235633Sdimstatic bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
817235633Sdim                                        uint64_t Mask,
818235633Sdim                                        SDValue Shift, SDValue X,
819235633Sdim                                        X86ISelAddressMode &AM) {
820235633Sdim  if (Shift.getOpcode() != ISD::SHL ||
821235633Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)))
822235633Sdim    return true;
823235633Sdim
824235633Sdim  // Not likely to be profitable if either the AND or SHIFT node has more
825235633Sdim  // than one use (unless all uses are for address computation). Besides,
826235633Sdim  // isel mechanism requires their node ids to be reused.
827235633Sdim  if (!N.hasOneUse() || !Shift.hasOneUse())
828235633Sdim    return true;
829235633Sdim
830235633Sdim  // Verify that the shift amount is something we can fold.
831235633Sdim  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
832235633Sdim  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
833235633Sdim    return true;
834235633Sdim
835263509Sdim  MVT VT = N.getSimpleValueType();
836263509Sdim  SDLoc DL(N);
837235633Sdim  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
838235633Sdim  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
839235633Sdim  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
840235633Sdim
841235633Sdim  // Insert the new nodes into the topological ordering. We must do this in
842235633Sdim  // a valid topological ordering as nothing is going to go back and re-sort
843235633Sdim  // these nodes. We continually insert before 'N' in sequence as this is
844235633Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
845235633Sdim  // hierarchy left to express.
846235633Sdim  InsertDAGNode(DAG, N, NewMask);
847235633Sdim  InsertDAGNode(DAG, N, NewAnd);
848235633Sdim  InsertDAGNode(DAG, N, NewShift);
849235633Sdim  DAG.ReplaceAllUsesWith(N, NewShift);
850235633Sdim
851235633Sdim  AM.Scale = 1 << ShiftAmt;
852235633Sdim  AM.IndexReg = NewAnd;
853235633Sdim  return false;
854235633Sdim}
855235633Sdim
856235633Sdim// Implement some heroics to detect shifts of masked values where the mask can
857235633Sdim// be replaced by extending the shift and undoing that in the addressing mode
858235633Sdim// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
859235633Sdim// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
860235633Sdim// the addressing mode. This results in code such as:
861235633Sdim//
862235633Sdim//   int f(short *y, int *lookup_table) {
863235633Sdim//     ...
864235633Sdim//     return *y + lookup_table[*y >> 11];
865235633Sdim//   }
866235633Sdim//
867235633Sdim// Turning into:
868235633Sdim//   movzwl (%rdi), %eax
869235633Sdim//   movl %eax, %ecx
870235633Sdim//   shrl $11, %ecx
871235633Sdim//   addl (%rsi,%rcx,4), %eax
872235633Sdim//
873235633Sdim// Instead of:
874235633Sdim//   movzwl (%rdi), %eax
875235633Sdim//   movl %eax, %ecx
876235633Sdim//   shrl $9, %ecx
877235633Sdim//   andl $124, %rcx
878235633Sdim//   addl (%rsi,%rcx), %eax
879235633Sdim//
880235633Sdim// Note that this function assumes the mask is provided as a mask *after* the
881235633Sdim// value is shifted. The input chain may or may not match that, but computing
882235633Sdim// such a mask is trivial.
883235633Sdimstatic bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
884235633Sdim                                    uint64_t Mask,
885235633Sdim                                    SDValue Shift, SDValue X,
886235633Sdim                                    X86ISelAddressMode &AM) {
887235633Sdim  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
888235633Sdim      !isa<ConstantSDNode>(Shift.getOperand(1)))
889235633Sdim    return true;
890235633Sdim
891235633Sdim  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
892263509Sdim  unsigned MaskLZ = countLeadingZeros(Mask);
893263509Sdim  unsigned MaskTZ = countTrailingZeros(Mask);
894235633Sdim
895235633Sdim  // The amount of shift we're trying to fit into the addressing mode is taken
896235633Sdim  // from the trailing zeros of the mask.
897235633Sdim  unsigned AMShiftAmt = MaskTZ;
898235633Sdim
899235633Sdim  // There is nothing we can do here unless the mask is removing some bits.
900235633Sdim  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
901235633Sdim  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
902235633Sdim
903235633Sdim  // We also need to ensure that mask is a continuous run of bits.
904235633Sdim  if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
905235633Sdim
906235633Sdim  // Scale the leading zero count down based on the actual size of the value.
907235633Sdim  // Also scale it down based on the size of the shift.
908263509Sdim  MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
909235633Sdim
910235633Sdim  // The final check is to ensure that any masked out high bits of X are
911235633Sdim  // already known to be zero. Otherwise, the mask has a semantic impact
912235633Sdim  // other than masking out a couple of low bits. Unfortunately, because of
913235633Sdim  // the mask, zero extensions will be removed from operands in some cases.
914235633Sdim  // This code works extra hard to look through extensions because we can
915235633Sdim  // replace them with zero extensions cheaply if necessary.
916235633Sdim  bool ReplacingAnyExtend = false;
917235633Sdim  if (X.getOpcode() == ISD::ANY_EXTEND) {
918263509Sdim    unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
919263509Sdim                          X.getOperand(0).getSimpleValueType().getSizeInBits();
920235633Sdim    // Assume that we'll replace the any-extend with a zero-extend, and
921235633Sdim    // narrow the search to the extended value.
922235633Sdim    X = X.getOperand(0);
923235633Sdim    MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
924235633Sdim    ReplacingAnyExtend = true;
925235633Sdim  }
926263509Sdim  APInt MaskedHighBits =
927263509Sdim    APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
928235633Sdim  APInt KnownZero, KnownOne;
929235633Sdim  DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
930235633Sdim  if (MaskedHighBits != KnownZero) return true;
931235633Sdim
932235633Sdim  // We've identified a pattern that can be transformed into a single shift
933235633Sdim  // and an addressing mode. Make it so.
934263509Sdim  MVT VT = N.getSimpleValueType();
935235633Sdim  if (ReplacingAnyExtend) {
936235633Sdim    assert(X.getValueType() != VT);
937235633Sdim    // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
938263509Sdim    SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
939235633Sdim    InsertDAGNode(DAG, N, NewX);
940235633Sdim    X = NewX;
941235633Sdim  }
942263509Sdim  SDLoc DL(N);
943235633Sdim  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8);
944235633Sdim  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
945235633Sdim  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
946235633Sdim  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
947235633Sdim
948235633Sdim  // Insert the new nodes into the topological ordering. We must do this in
949235633Sdim  // a valid topological ordering as nothing is going to go back and re-sort
950235633Sdim  // these nodes. We continually insert before 'N' in sequence as this is
951235633Sdim  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
952235633Sdim  // hierarchy left to express.
953235633Sdim  InsertDAGNode(DAG, N, NewSRLAmt);
954235633Sdim  InsertDAGNode(DAG, N, NewSRL);
955235633Sdim  InsertDAGNode(DAG, N, NewSHLAmt);
956235633Sdim  InsertDAGNode(DAG, N, NewSHL);
957235633Sdim  DAG.ReplaceAllUsesWith(N, NewSHL);
958235633Sdim
959235633Sdim  AM.Scale = 1 << AMShiftAmt;
960235633Sdim  AM.IndexReg = NewSRL;
961235633Sdim  return false;
962235633Sdim}
963235633Sdim
964198090Srdivackybool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
965198090Srdivacky                                              unsigned Depth) {
966263509Sdim  SDLoc dl(N);
967198090Srdivacky  DEBUG({
968202375Srdivacky      dbgs() << "MatchAddress: ";
969198090Srdivacky      AM.dump();
970198090Srdivacky    });
971193323Sed  // Limit recursion.
972193323Sed  if (Depth > 5)
973193323Sed    return MatchAddressBase(N, AM);
974198090Srdivacky
975195098Sed  // If this is already a %rip relative address, we can only merge immediates
976195098Sed  // into it.  Instead of handling this in every case, we handle it here.
977193323Sed  // RIP relative addressing: %rip + 32-bit displacement!
978195098Sed  if (AM.isRIPRelative()) {
979195098Sed    // FIXME: JumpTable and ExternalSymbol address currently don't like
980195098Sed    // displacements.  It isn't very important, but this should be fixed for
981195098Sed    // consistency.
982195098Sed    if (!AM.ES && AM.JT != -1) return true;
983198090Srdivacky
984224145Sdim    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
985224145Sdim      if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
986193323Sed        return false;
987193323Sed    return true;
988193323Sed  }
989193323Sed
990193323Sed  switch (N.getOpcode()) {
991193323Sed  default: break;
992193323Sed  case ISD::Constant: {
993193323Sed    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
994224145Sdim    if (!FoldOffsetIntoAddress(Val, AM))
995193323Sed      return false;
996193323Sed    break;
997193323Sed  }
998193323Sed
999193323Sed  case X86ISD::Wrapper:
1000195098Sed  case X86ISD::WrapperRIP:
1001193323Sed    if (!MatchWrapper(N, AM))
1002193323Sed      return false;
1003193323Sed    break;
1004193323Sed
1005193323Sed  case ISD::LOAD:
1006218893Sdim    if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
1007193323Sed      return false;
1008193323Sed    break;
1009193323Sed
1010193323Sed  case ISD::FrameIndex:
1011224145Sdim    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1012224145Sdim        AM.Base_Reg.getNode() == 0 &&
1013224145Sdim        (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1014193323Sed      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1015207618Srdivacky      AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1016193323Sed      return false;
1017193323Sed    }
1018193323Sed    break;
1019193323Sed
1020193323Sed  case ISD::SHL:
1021195098Sed    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
1022193323Sed      break;
1023245431Sdim
1024193323Sed    if (ConstantSDNode
1025193323Sed          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
1026193323Sed      unsigned Val = CN->getZExtValue();
1027198090Srdivacky      // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1028198090Srdivacky      // that the base operand remains free for further matching. If
1029198090Srdivacky      // the base doesn't end up getting used, a post-processing step
1030198090Srdivacky      // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1031193323Sed      if (Val == 1 || Val == 2 || Val == 3) {
1032193323Sed        AM.Scale = 1 << Val;
1033193323Sed        SDValue ShVal = N.getNode()->getOperand(0);
1034193323Sed
1035193323Sed        // Okay, we know that we have a scale by now.  However, if the scaled
1036193323Sed        // value is an add of something and a constant, we can fold the
1037193323Sed        // constant into the disp field here.
1038218893Sdim        if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1039193323Sed          AM.IndexReg = ShVal.getNode()->getOperand(0);
1040193323Sed          ConstantSDNode *AddVal =
1041193323Sed            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
1042245431Sdim          uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1043224145Sdim          if (!FoldOffsetIntoAddress(Disp, AM))
1044224145Sdim            return false;
1045193323Sed        }
1046224145Sdim
1047224145Sdim        AM.IndexReg = ShVal;
1048193323Sed        return false;
1049193323Sed      }
1050252723Sdim    }
1051193323Sed    break;
1052193323Sed
1053235633Sdim  case ISD::SRL: {
1054235633Sdim    // Scale must not be used already.
1055235633Sdim    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
1056235633Sdim
1057235633Sdim    SDValue And = N.getOperand(0);
1058235633Sdim    if (And.getOpcode() != ISD::AND) break;
1059235633Sdim    SDValue X = And.getOperand(0);
1060235633Sdim
1061235633Sdim    // We only handle up to 64-bit values here as those are what matter for
1062235633Sdim    // addressing mode optimizations.
1063263509Sdim    if (X.getSimpleValueType().getSizeInBits() > 64) break;
1064235633Sdim
1065235633Sdim    // The mask used for the transform is expected to be post-shift, but we
1066235633Sdim    // found the shift first so just apply the shift to the mask before passing
1067235633Sdim    // it down.
1068235633Sdim    if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1069235633Sdim        !isa<ConstantSDNode>(And.getOperand(1)))
1070235633Sdim      break;
1071235633Sdim    uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1072235633Sdim
1073235633Sdim    // Try to fold the mask and shift into the scale, and return false if we
1074235633Sdim    // succeed.
1075235633Sdim    if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1076235633Sdim      return false;
1077235633Sdim    break;
1078235633Sdim  }
1079235633Sdim
1080193323Sed  case ISD::SMUL_LOHI:
1081193323Sed  case ISD::UMUL_LOHI:
1082193323Sed    // A mul_lohi where we need the low part can be folded as a plain multiply.
1083193323Sed    if (N.getResNo() != 0) break;
1084193323Sed    // FALL THROUGH
1085193323Sed  case ISD::MUL:
1086193323Sed  case X86ISD::MUL_IMM:
1087193323Sed    // X*[3,5,9] -> X+X*[2,4,8]
1088193323Sed    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1089207618Srdivacky        AM.Base_Reg.getNode() == 0 &&
1090195098Sed        AM.IndexReg.getNode() == 0) {
1091193323Sed      if (ConstantSDNode
1092193323Sed            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
1093193323Sed        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1094193323Sed            CN->getZExtValue() == 9) {
1095193323Sed          AM.Scale = unsigned(CN->getZExtValue())-1;
1096193323Sed
1097193323Sed          SDValue MulVal = N.getNode()->getOperand(0);
1098193323Sed          SDValue Reg;
1099193323Sed
1100193323Sed          // Okay, we know that we have a scale by now.  However, if the scaled
1101193323Sed          // value is an add of something and a constant, we can fold the
1102193323Sed          // constant into the disp field here.
1103193323Sed          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1104193323Sed              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
1105193323Sed            Reg = MulVal.getNode()->getOperand(0);
1106193323Sed            ConstantSDNode *AddVal =
1107193323Sed              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
1108224145Sdim            uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1109224145Sdim            if (FoldOffsetIntoAddress(Disp, AM))
1110193323Sed              Reg = N.getNode()->getOperand(0);
1111193323Sed          } else {
1112193323Sed            Reg = N.getNode()->getOperand(0);
1113193323Sed          }
1114193323Sed
1115207618Srdivacky          AM.IndexReg = AM.Base_Reg = Reg;
1116193323Sed          return false;
1117193323Sed        }
1118193323Sed    }
1119193323Sed    break;
1120193323Sed
1121193323Sed  case ISD::SUB: {
1122193323Sed    // Given A-B, if A can be completely folded into the address and
1123193323Sed    // the index field with the index field unused, use -B as the index.
1124193323Sed    // This is a win if a has multiple parts that can be folded into
1125193323Sed    // the address. Also, this saves a mov if the base register has
1126193323Sed    // other uses, since it avoids a two-address sub instruction, however
1127193323Sed    // it costs an additional mov if the index register has other uses.
1128193323Sed
1129210299Sed    // Add an artificial use to this node so that we can keep track of
1130210299Sed    // it if it gets CSE'd with a different node.
1131210299Sed    HandleSDNode Handle(N);
1132210299Sed
1133193323Sed    // Test if the LHS of the sub can be folded.
1134193323Sed    X86ISelAddressMode Backup = AM;
1135210299Sed    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
1136193323Sed      AM = Backup;
1137193323Sed      break;
1138193323Sed    }
1139193323Sed    // Test if the index field is free for use.
1140195098Sed    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1141193323Sed      AM = Backup;
1142193323Sed      break;
1143193323Sed    }
1144205407Srdivacky
1145193323Sed    int Cost = 0;
1146210299Sed    SDValue RHS = Handle.getValue().getNode()->getOperand(1);
1147193323Sed    // If the RHS involves a register with multiple uses, this
1148193323Sed    // transformation incurs an extra mov, due to the neg instruction
1149193323Sed    // clobbering its operand.
1150193323Sed    if (!RHS.getNode()->hasOneUse() ||
1151193323Sed        RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1152193323Sed        RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1153193323Sed        RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1154193323Sed        (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1155193323Sed         RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
1156193323Sed      ++Cost;
1157193323Sed    // If the base is a register with multiple uses, this
1158193323Sed    // transformation may save a mov.
1159193323Sed    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
1160207618Srdivacky         AM.Base_Reg.getNode() &&
1161207618Srdivacky         !AM.Base_Reg.getNode()->hasOneUse()) ||
1162193323Sed        AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1163193323Sed      --Cost;
1164193323Sed    // If the folded LHS was interesting, this transformation saves
1165193323Sed    // address arithmetic.
1166193323Sed    if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1167193323Sed        ((AM.Disp != 0) && (Backup.Disp == 0)) +
1168193323Sed        (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1169193323Sed      --Cost;
1170193323Sed    // If it doesn't look like it may be an overall win, don't do it.
1171193323Sed    if (Cost >= 0) {
1172193323Sed      AM = Backup;
1173193323Sed      break;
1174193323Sed    }
1175193323Sed
1176193323Sed    // Ok, the transformation is legal and appears profitable. Go for it.
1177193323Sed    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
1178193323Sed    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1179193323Sed    AM.IndexReg = Neg;
1180193323Sed    AM.Scale = 1;
1181193323Sed
1182193323Sed    // Insert the new nodes into the topological ordering.
1183235633Sdim    InsertDAGNode(*CurDAG, N, Zero);
1184235633Sdim    InsertDAGNode(*CurDAG, N, Neg);
1185193323Sed    return false;
1186193323Sed  }
1187193323Sed
1188193323Sed  case ISD::ADD: {
1189210299Sed    // Add an artificial use to this node so that we can keep track of
1190210299Sed    // it if it gets CSE'd with a different node.
1191210299Sed    HandleSDNode Handle(N);
1192210299Sed
1193193323Sed    X86ISelAddressMode Backup = AM;
1194218893Sdim    if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1195218893Sdim        !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1196210299Sed      return false;
1197210299Sed    AM = Backup;
1198245431Sdim
1199205407Srdivacky    // Try again after commuting the operands.
1200218893Sdim    if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
1201218893Sdim        !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1202210299Sed      return false;
1203193323Sed    AM = Backup;
1204193323Sed
1205193323Sed    // If we couldn't fold both operands into the address at the same time,
1206193323Sed    // see if we can just put each operand into a register and fold at least
1207193323Sed    // the add.
1208193323Sed    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1209207618Srdivacky        !AM.Base_Reg.getNode() &&
1210195098Sed        !AM.IndexReg.getNode()) {
1211218893Sdim      N = Handle.getValue();
1212218893Sdim      AM.Base_Reg = N.getOperand(0);
1213218893Sdim      AM.IndexReg = N.getOperand(1);
1214193323Sed      AM.Scale = 1;
1215193323Sed      return false;
1216193323Sed    }
1217218893Sdim    N = Handle.getValue();
1218193323Sed    break;
1219193323Sed  }
1220193323Sed
1221193323Sed  case ISD::OR:
1222193323Sed    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
1223218893Sdim    if (CurDAG->isBaseWithConstantOffset(N)) {
1224193323Sed      X86ISelAddressMode Backup = AM;
1225207618Srdivacky      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
1226205407Srdivacky
1227193323Sed      // Start with the LHS as an addr mode.
1228210299Sed      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1229224145Sdim          !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
1230193323Sed        return false;
1231193323Sed      AM = Backup;
1232193323Sed    }
1233193323Sed    break;
1234245431Sdim
1235193323Sed  case ISD::AND: {
1236193323Sed    // Perform some heroic transforms on an and of a constant-count shift
1237193323Sed    // with a constant to enable use of the scaled offset field.
1238193323Sed
1239193323Sed    // Scale must not be used already.
1240193323Sed    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
1241193323Sed
1242235633Sdim    SDValue Shift = N.getOperand(0);
1243235633Sdim    if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1244193323Sed    SDValue X = Shift.getOperand(0);
1245193323Sed
1246235633Sdim    // We only handle up to 64-bit values here as those are what matter for
1247235633Sdim    // addressing mode optimizations.
1248263509Sdim    if (X.getSimpleValueType().getSizeInBits() > 64) break;
1249193323Sed
1250235633Sdim    if (!isa<ConstantSDNode>(N.getOperand(1)))
1251235633Sdim      break;
1252235633Sdim    uint64_t Mask = N.getConstantOperandVal(1);
1253193323Sed
1254235633Sdim    // Try to fold the mask and shift into an extract and scale.
1255235633Sdim    if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1256235633Sdim      return false;
1257193323Sed
1258235633Sdim    // Try to fold the mask and shift directly into the scale.
1259235633Sdim    if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1260235633Sdim      return false;
1261193323Sed
1262235633Sdim    // Try to swap the mask and shift to place shifts which can be done as
1263235633Sdim    // a scale on the outside of the mask.
1264235633Sdim    if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1265235633Sdim      return false;
1266235633Sdim    break;
1267193323Sed  }
1268193323Sed  }
1269193323Sed
1270193323Sed  return MatchAddressBase(N, AM);
1271193323Sed}
1272193323Sed
1273193323Sed/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1274193323Sed/// specified addressing mode without any further recursion.
1275193323Sedbool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1276193323Sed  // Is the base register already occupied?
1277207618Srdivacky  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1278193323Sed    // If so, check to see if the scale index register is set.
1279195098Sed    if (AM.IndexReg.getNode() == 0) {
1280193323Sed      AM.IndexReg = N;
1281193323Sed      AM.Scale = 1;
1282193323Sed      return false;
1283193323Sed    }
1284193323Sed
1285193323Sed    // Otherwise, we cannot select it.
1286193323Sed    return true;
1287193323Sed  }
1288193323Sed
1289193323Sed  // Default, generate it as a register.
1290193323Sed  AM.BaseType = X86ISelAddressMode::RegBase;
1291207618Srdivacky  AM.Base_Reg = N;
1292193323Sed  return false;
1293193323Sed}
1294193323Sed
1295193323Sed/// SelectAddr - returns true if it is able pattern match an addressing mode.
1296193323Sed/// It returns the operands which make up the maximal addressing mode it can
1297193323Sed/// match by reference.
1298218893Sdim///
1299218893Sdim/// Parent is the parent node of the addr operand that is being matched.  It
1300218893Sdim/// is always a load, store, atomic node, or null.  It is only null when
1301218893Sdim/// checking memory operands for inline asm nodes.
1302218893Sdimbool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1303193323Sed                                 SDValue &Scale, SDValue &Index,
1304193323Sed                                 SDValue &Disp, SDValue &Segment) {
1305193323Sed  X86ISelAddressMode AM;
1306245431Sdim
1307218893Sdim  if (Parent &&
1308218893Sdim      // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1309218893Sdim      // that are not a MemSDNode, and thus don't have proper addrspace info.
1310218893Sdim      Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1311218893Sdim      Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1312245431Sdim      Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1313245431Sdim      Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1314245431Sdim      Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1315218893Sdim    unsigned AddrSpace =
1316218893Sdim      cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1317218893Sdim    // AddrSpace 256 -> GS, 257 -> FS.
1318218893Sdim    if (AddrSpace == 256)
1319218893Sdim      AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1320218893Sdim    if (AddrSpace == 257)
1321218893Sdim      AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1322218893Sdim  }
1323245431Sdim
1324201360Srdivacky  if (MatchAddress(N, AM))
1325193323Sed    return false;
1326193323Sed
1327263509Sdim  MVT VT = N.getSimpleValueType();
1328193323Sed  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1329207618Srdivacky    if (!AM.Base_Reg.getNode())
1330207618Srdivacky      AM.Base_Reg = CurDAG->getRegister(0, VT);
1331193323Sed  }
1332193323Sed
1333193323Sed  if (!AM.IndexReg.getNode())
1334193323Sed    AM.IndexReg = CurDAG->getRegister(0, VT);
1335193323Sed
1336193323Sed  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1337193323Sed  return true;
1338193323Sed}
1339193323Sed
1340193323Sed/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
1341193323Sed/// match a load whose top elements are either undef or zeros.  The load flavor
1342193323Sed/// is derived from the type of N, which is either v4f32 or v2f64.
1343204642Srdivacky///
1344204642Srdivacky/// We also return:
1345204642Srdivacky///   PatternChainNode: this is the matched node that has a chain input and
1346204642Srdivacky///   output.
1347204642Srdivackybool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
1348193323Sed                                          SDValue N, SDValue &Base,
1349193323Sed                                          SDValue &Scale, SDValue &Index,
1350193323Sed                                          SDValue &Disp, SDValue &Segment,
1351204642Srdivacky                                          SDValue &PatternNodeWithChain) {
1352193323Sed  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1353204642Srdivacky    PatternNodeWithChain = N.getOperand(0);
1354204642Srdivacky    if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1355204642Srdivacky        PatternNodeWithChain.hasOneUse() &&
1356204642Srdivacky        IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1357207618Srdivacky        IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1358204642Srdivacky      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1359218893Sdim      if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1360193323Sed        return false;
1361193323Sed      return true;
1362193323Sed    }
1363193323Sed  }
1364193323Sed
1365193323Sed  // Also handle the case where we explicitly require zeros in the top
1366193323Sed  // elements.  This is a vector shuffle from the zero vector.
1367193323Sed  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1368193323Sed      // Check to see if the top elements are all zeros (or bitcast of zeros).
1369245431Sdim      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1370193323Sed      N.getOperand(0).getNode()->hasOneUse() &&
1371193323Sed      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1372204642Srdivacky      N.getOperand(0).getOperand(0).hasOneUse() &&
1373204642Srdivacky      IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1374207618Srdivacky      IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1375193323Sed    // Okay, this is a zero extending load.  Fold it.
1376193323Sed    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1377218893Sdim    if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1378193323Sed      return false;
1379204642Srdivacky    PatternNodeWithChain = SDValue(LD, 0);
1380193323Sed    return true;
1381193323Sed  }
1382193323Sed  return false;
1383193323Sed}
1384193323Sed
1385193323Sed
1386263509Sdimbool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
1387263509Sdim  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1388263509Sdim    uint64_t ImmVal = CN->getZExtValue();
1389263509Sdim    if ((uint32_t)ImmVal != (uint64_t)ImmVal)
1390263509Sdim      return false;
1391263509Sdim
1392263509Sdim    Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64);
1393263509Sdim    return true;
1394263509Sdim  }
1395263509Sdim
1396263509Sdim  // In static codegen with small code model, we can get the address of a label
1397263509Sdim  // into a register with 'movl'. TableGen has already made sure we're looking
1398263509Sdim  // at a label of some kind.
1399263509Sdim  assert(N->getOpcode() == X86ISD::Wrapper &&
1400263509Sdim         "Unexpected node type for MOV32ri64");
1401263509Sdim  N = N.getOperand(0);
1402263509Sdim
1403263509Sdim  if (N->getOpcode() != ISD::TargetConstantPool &&
1404263509Sdim      N->getOpcode() != ISD::TargetJumpTable &&
1405263509Sdim      N->getOpcode() != ISD::TargetGlobalAddress &&
1406263509Sdim      N->getOpcode() != ISD::TargetExternalSymbol &&
1407263509Sdim      N->getOpcode() != ISD::TargetBlockAddress)
1408263509Sdim    return false;
1409263509Sdim
1410263509Sdim  Imm = N;
1411263509Sdim  return TM.getCodeModel() == CodeModel::Small;
1412263509Sdim}
1413263509Sdim
1414263509Sdimbool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
1415263509Sdim                                         SDValue &Scale, SDValue &Index,
1416263509Sdim                                         SDValue &Disp, SDValue &Segment) {
1417263509Sdim  if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1418263509Sdim    return false;
1419263509Sdim
1420263509Sdim  SDLoc DL(N);
1421263509Sdim  RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
1422263509Sdim  if (RN && RN->getReg() == 0)
1423263509Sdim    Base = CurDAG->getRegister(0, MVT::i64);
1424263509Sdim  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(N)) {
1425263509Sdim    // Base could already be %rip, particularly in the x32 ABI.
1426263509Sdim    Base = SDValue(CurDAG->getMachineNode(
1427263509Sdim                       TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1428263509Sdim                       CurDAG->getTargetConstant(0, MVT::i64),
1429263509Sdim                       Base,
1430263509Sdim                       CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
1431263509Sdim                   0);
1432263509Sdim  }
1433263509Sdim
1434263509Sdim  RN = dyn_cast<RegisterSDNode>(Index);
1435263509Sdim  if (RN && RN->getReg() == 0)
1436263509Sdim    Index = CurDAG->getRegister(0, MVT::i64);
1437263509Sdim  else {
1438263509Sdim    assert(Index.getValueType() == MVT::i32 &&
1439263509Sdim           "Expect to be extending 32-bit registers for use in LEA");
1440263509Sdim    Index = SDValue(CurDAG->getMachineNode(
1441263509Sdim                        TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1442263509Sdim                        CurDAG->getTargetConstant(0, MVT::i64),
1443263509Sdim                        Index,
1444263509Sdim                        CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
1445263509Sdim                    0);
1446263509Sdim  }
1447263509Sdim
1448263509Sdim  return true;
1449263509Sdim}
1450263509Sdim
1451193323Sed/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1452193323Sed/// mode it matches can be cost effectively emitted as an LEA instruction.
1453218893Sdimbool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
1454193323Sed                                    SDValue &Base, SDValue &Scale,
1455210299Sed                                    SDValue &Index, SDValue &Disp,
1456210299Sed                                    SDValue &Segment) {
1457193323Sed  X86ISelAddressMode AM;
1458193323Sed
1459193323Sed  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1460193323Sed  // segments.
1461193323Sed  SDValue Copy = AM.Segment;
1462193323Sed  SDValue T = CurDAG->getRegister(0, MVT::i32);
1463193323Sed  AM.Segment = T;
1464193323Sed  if (MatchAddress(N, AM))
1465193323Sed    return false;
1466193323Sed  assert (T == AM.Segment);
1467193323Sed  AM.Segment = Copy;
1468193323Sed
1469263509Sdim  MVT VT = N.getSimpleValueType();
1470193323Sed  unsigned Complexity = 0;
1471193323Sed  if (AM.BaseType == X86ISelAddressMode::RegBase)
1472207618Srdivacky    if (AM.Base_Reg.getNode())
1473193323Sed      Complexity = 1;
1474193323Sed    else
1475207618Srdivacky      AM.Base_Reg = CurDAG->getRegister(0, VT);
1476193323Sed  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1477193323Sed    Complexity = 4;
1478193323Sed
1479193323Sed  if (AM.IndexReg.getNode())
1480193323Sed    Complexity++;
1481193323Sed  else
1482193323Sed    AM.IndexReg = CurDAG->getRegister(0, VT);
1483193323Sed
1484193323Sed  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1485193323Sed  // a simple shift.
1486193323Sed  if (AM.Scale > 1)
1487193323Sed    Complexity++;
1488193323Sed
1489193323Sed  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1490193323Sed  // to a LEA. This is determined with some expermentation but is by no means
1491193323Sed  // optimal (especially for code size consideration). LEA is nice because of
1492193323Sed  // its three-address nature. Tweak the cost function again when we can run
1493193323Sed  // convertToThreeAddress() at register allocation time.
1494193323Sed  if (AM.hasSymbolicDisplacement()) {
1495193323Sed    // For X86-64, we should always use lea to materialize RIP relative
1496193323Sed    // addresses.
1497193323Sed    if (Subtarget->is64Bit())
1498193323Sed      Complexity = 4;
1499193323Sed    else
1500193323Sed      Complexity += 2;
1501193323Sed  }
1502193323Sed
1503207618Srdivacky  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1504193323Sed    Complexity++;
1505193323Sed
1506198090Srdivacky  // If it isn't worth using an LEA, reject it.
1507198090Srdivacky  if (Complexity <= 2)
1508198090Srdivacky    return false;
1509245431Sdim
1510198090Srdivacky  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1511198090Srdivacky  return true;
1512193323Sed}
1513193323Sed
1514194612Sed/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1515218893Sdimbool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
1516194612Sed                                        SDValue &Scale, SDValue &Index,
1517210299Sed                                        SDValue &Disp, SDValue &Segment) {
1518194612Sed  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1519194612Sed  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1520245431Sdim
1521194612Sed  X86ISelAddressMode AM;
1522194612Sed  AM.GV = GA->getGlobal();
1523194612Sed  AM.Disp += GA->getOffset();
1524207618Srdivacky  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1525195098Sed  AM.SymbolFlags = GA->getTargetFlags();
1526195098Sed
1527194612Sed  if (N.getValueType() == MVT::i32) {
1528194612Sed    AM.Scale = 1;
1529194612Sed    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1530194612Sed  } else {
1531194612Sed    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1532194612Sed  }
1533245431Sdim
1534194612Sed  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1535194612Sed  return true;
1536194612Sed}
1537194612Sed
1538194612Sed
1539202375Srdivackybool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
1540193323Sed                                  SDValue &Base, SDValue &Scale,
1541193323Sed                                  SDValue &Index, SDValue &Disp,
1542193323Sed                                  SDValue &Segment) {
1543204642Srdivacky  if (!ISD::isNON_EXTLoad(N.getNode()) ||
1544204642Srdivacky      !IsProfitableToFold(N, P, P) ||
1545207618Srdivacky      !IsLegalToFold(N, P, P, OptLevel))
1546204642Srdivacky    return false;
1547245431Sdim
1548218893Sdim  return SelectAddr(N.getNode(),
1549218893Sdim                    N.getOperand(1), Base, Scale, Index, Disp, Segment);
1550193323Sed}
1551193323Sed
1552193323Sed/// getGlobalBaseReg - Return an SDNode that returns the value of
1553193323Sed/// the global base register. Output instructions required to
1554193323Sed/// initialize the global base register, if necessary.
1555193323Sed///
1556193323SedSDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1557193399Sed  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1558263509Sdim  return CurDAG->getRegister(GlobalBaseReg,
1559263509Sdim                             getTargetLowering()->getPointerTy()).getNode();
1560193323Sed}
1561193323Sed
1562193323SedSDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1563193323Sed  SDValue Chain = Node->getOperand(0);
1564193323Sed  SDValue In1 = Node->getOperand(1);
1565193323Sed  SDValue In2L = Node->getOperand(2);
1566193323Sed  SDValue In2H = Node->getOperand(3);
1567245431Sdim
1568193323Sed  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1569218893Sdim  if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1570193323Sed    return NULL;
1571198090Srdivacky  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1572198090Srdivacky  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1573198090Srdivacky  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
1574263509Sdim  SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
1575252723Sdim                                           MVT::i32, MVT::i32, MVT::Other, Ops);
1576198090Srdivacky  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
1577198090Srdivacky  return ResNode;
1578193323Sed}
1579193323Sed
1580245431Sdim/// Atomic opcode table
1581245431Sdim///
1582223017Sdimenum AtomicOpc {
1583245431Sdim  ADD,
1584245431Sdim  SUB,
1585245431Sdim  INC,
1586245431Sdim  DEC,
1587223017Sdim  OR,
1588223017Sdim  AND,
1589223017Sdim  XOR,
1590223017Sdim  AtomicOpcEnd
1591223017Sdim};
1592223017Sdim
1593223017Sdimenum AtomicSz {
1594223017Sdim  ConstantI8,
1595223017Sdim  I8,
1596223017Sdim  SextConstantI16,
1597223017Sdim  ConstantI16,
1598223017Sdim  I16,
1599223017Sdim  SextConstantI32,
1600223017Sdim  ConstantI32,
1601223017Sdim  I32,
1602223017Sdim  SextConstantI64,
1603223017Sdim  ConstantI64,
1604223017Sdim  I64,
1605223017Sdim  AtomicSzEnd
1606223017Sdim};
1607223017Sdim
1608235633Sdimstatic const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
1609223017Sdim  {
1610245431Sdim    X86::LOCK_ADD8mi,
1611245431Sdim    X86::LOCK_ADD8mr,
1612245431Sdim    X86::LOCK_ADD16mi8,
1613245431Sdim    X86::LOCK_ADD16mi,
1614245431Sdim    X86::LOCK_ADD16mr,
1615245431Sdim    X86::LOCK_ADD32mi8,
1616245431Sdim    X86::LOCK_ADD32mi,
1617245431Sdim    X86::LOCK_ADD32mr,
1618245431Sdim    X86::LOCK_ADD64mi8,
1619245431Sdim    X86::LOCK_ADD64mi32,
1620245431Sdim    X86::LOCK_ADD64mr,
1621245431Sdim  },
1622245431Sdim  {
1623245431Sdim    X86::LOCK_SUB8mi,
1624245431Sdim    X86::LOCK_SUB8mr,
1625245431Sdim    X86::LOCK_SUB16mi8,
1626245431Sdim    X86::LOCK_SUB16mi,
1627245431Sdim    X86::LOCK_SUB16mr,
1628245431Sdim    X86::LOCK_SUB32mi8,
1629245431Sdim    X86::LOCK_SUB32mi,
1630245431Sdim    X86::LOCK_SUB32mr,
1631245431Sdim    X86::LOCK_SUB64mi8,
1632245431Sdim    X86::LOCK_SUB64mi32,
1633245431Sdim    X86::LOCK_SUB64mr,
1634245431Sdim  },
1635245431Sdim  {
1636245431Sdim    0,
1637245431Sdim    X86::LOCK_INC8m,
1638245431Sdim    0,
1639245431Sdim    0,
1640245431Sdim    X86::LOCK_INC16m,
1641245431Sdim    0,
1642245431Sdim    0,
1643245431Sdim    X86::LOCK_INC32m,
1644245431Sdim    0,
1645245431Sdim    0,
1646245431Sdim    X86::LOCK_INC64m,
1647245431Sdim  },
1648245431Sdim  {
1649245431Sdim    0,
1650245431Sdim    X86::LOCK_DEC8m,
1651245431Sdim    0,
1652245431Sdim    0,
1653245431Sdim    X86::LOCK_DEC16m,
1654245431Sdim    0,
1655245431Sdim    0,
1656245431Sdim    X86::LOCK_DEC32m,
1657245431Sdim    0,
1658245431Sdim    0,
1659245431Sdim    X86::LOCK_DEC64m,
1660245431Sdim  },
1661245431Sdim  {
1662223017Sdim    X86::LOCK_OR8mi,
1663223017Sdim    X86::LOCK_OR8mr,
1664223017Sdim    X86::LOCK_OR16mi8,
1665223017Sdim    X86::LOCK_OR16mi,
1666223017Sdim    X86::LOCK_OR16mr,
1667223017Sdim    X86::LOCK_OR32mi8,
1668223017Sdim    X86::LOCK_OR32mi,
1669223017Sdim    X86::LOCK_OR32mr,
1670223017Sdim    X86::LOCK_OR64mi8,
1671223017Sdim    X86::LOCK_OR64mi32,
1672245431Sdim    X86::LOCK_OR64mr,
1673223017Sdim  },
1674223017Sdim  {
1675223017Sdim    X86::LOCK_AND8mi,
1676223017Sdim    X86::LOCK_AND8mr,
1677223017Sdim    X86::LOCK_AND16mi8,
1678223017Sdim    X86::LOCK_AND16mi,
1679223017Sdim    X86::LOCK_AND16mr,
1680223017Sdim    X86::LOCK_AND32mi8,
1681223017Sdim    X86::LOCK_AND32mi,
1682223017Sdim    X86::LOCK_AND32mr,
1683223017Sdim    X86::LOCK_AND64mi8,
1684223017Sdim    X86::LOCK_AND64mi32,
1685245431Sdim    X86::LOCK_AND64mr,
1686223017Sdim  },
1687223017Sdim  {
1688223017Sdim    X86::LOCK_XOR8mi,
1689223017Sdim    X86::LOCK_XOR8mr,
1690223017Sdim    X86::LOCK_XOR16mi8,
1691223017Sdim    X86::LOCK_XOR16mi,
1692223017Sdim    X86::LOCK_XOR16mr,
1693223017Sdim    X86::LOCK_XOR32mi8,
1694223017Sdim    X86::LOCK_XOR32mi,
1695223017Sdim    X86::LOCK_XOR32mr,
1696223017Sdim    X86::LOCK_XOR64mi8,
1697223017Sdim    X86::LOCK_XOR64mi32,
1698245431Sdim    X86::LOCK_XOR64mr,
1699223017Sdim  }
1700223017Sdim};
1701223017Sdim
1702245431Sdim// Return the target constant operand for atomic-load-op and do simple
1703245431Sdim// translations, such as from atomic-load-add to lock-sub. The return value is
1704245431Sdim// one of the following 3 cases:
1705245431Sdim// + target-constant, the operand could be supported as a target constant.
1706245431Sdim// + empty, the operand is not needed any more with the new op selected.
1707245431Sdim// + non-empty, otherwise.
1708245431Sdimstatic SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
1709263509Sdim                                                SDLoc dl,
1710263509Sdim                                                enum AtomicOpc &Op, MVT NVT,
1711245431Sdim                                                SDValue Val) {
1712245431Sdim  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
1713245431Sdim    int64_t CNVal = CN->getSExtValue();
1714245431Sdim    // Quit if not 32-bit imm.
1715245431Sdim    if ((int32_t)CNVal != CNVal)
1716245431Sdim      return Val;
1717245431Sdim    // For atomic-load-add, we could do some optimizations.
1718245431Sdim    if (Op == ADD) {
1719245431Sdim      // Translate to INC/DEC if ADD by 1 or -1.
1720245431Sdim      if ((CNVal == 1) || (CNVal == -1)) {
1721245431Sdim        Op = (CNVal == 1) ? INC : DEC;
1722245431Sdim        // No more constant operand after being translated into INC/DEC.
1723245431Sdim        return SDValue();
1724245431Sdim      }
1725245431Sdim      // Translate to SUB if ADD by negative value.
1726245431Sdim      if (CNVal < 0) {
1727245431Sdim        Op = SUB;
1728245431Sdim        CNVal = -CNVal;
1729245431Sdim      }
1730245431Sdim    }
1731245431Sdim    return CurDAG->getTargetConstant(CNVal, NVT);
1732245431Sdim  }
1733245431Sdim
1734245431Sdim  // If the value operand is single-used, try to optimize it.
1735245431Sdim  if (Op == ADD && Val.hasOneUse()) {
1736245431Sdim    // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
1737245431Sdim    if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
1738245431Sdim      Op = SUB;
1739245431Sdim      return Val.getOperand(1);
1740245431Sdim    }
1741245431Sdim    // A special case for i16, which needs truncating as, in most cases, it's
1742245431Sdim    // promoted to i32. We will translate
1743245431Sdim    // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
1744245431Sdim    if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
1745245431Sdim        Val.getOperand(0).getOpcode() == ISD::SUB &&
1746245431Sdim        X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
1747245431Sdim      Op = SUB;
1748245431Sdim      Val = Val.getOperand(0);
1749245431Sdim      return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
1750245431Sdim                                            Val.getOperand(1));
1751245431Sdim    }
1752245431Sdim  }
1753245431Sdim
1754245431Sdim  return Val;
1755245431Sdim}
1756245431Sdim
1757263509SdimSDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
1758223017Sdim  if (Node->hasAnyUseOfValue(0))
1759223017Sdim    return 0;
1760245431Sdim
1761263509Sdim  SDLoc dl(Node);
1762245431Sdim
1763223017Sdim  // Optimize common patterns for __sync_or_and_fetch and similar arith
1764223017Sdim  // operations where the result is not used. This allows us to use the "lock"
1765223017Sdim  // version of the arithmetic instruction.
1766223017Sdim  SDValue Chain = Node->getOperand(0);
1767223017Sdim  SDValue Ptr = Node->getOperand(1);
1768223017Sdim  SDValue Val = Node->getOperand(2);
1769223017Sdim  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1770223017Sdim  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1771223017Sdim    return 0;
1772223017Sdim
1773223017Sdim  // Which index into the table.
1774223017Sdim  enum AtomicOpc Op;
1775223017Sdim  switch (Node->getOpcode()) {
1776245431Sdim    default:
1777245431Sdim      return 0;
1778223017Sdim    case ISD::ATOMIC_LOAD_OR:
1779223017Sdim      Op = OR;
1780223017Sdim      break;
1781223017Sdim    case ISD::ATOMIC_LOAD_AND:
1782223017Sdim      Op = AND;
1783223017Sdim      break;
1784223017Sdim    case ISD::ATOMIC_LOAD_XOR:
1785223017Sdim      Op = XOR;
1786223017Sdim      break;
1787245431Sdim    case ISD::ATOMIC_LOAD_ADD:
1788245431Sdim      Op = ADD;
1789245431Sdim      break;
1790223017Sdim  }
1791252723Sdim
1792245431Sdim  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
1793245431Sdim  bool isUnOp = !Val.getNode();
1794245431Sdim  bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
1795245431Sdim
1796223017Sdim  unsigned Opc = 0;
1797263509Sdim  switch (NVT.SimpleTy) {
1798223017Sdim    default: return 0;
1799223017Sdim    case MVT::i8:
1800223017Sdim      if (isCN)
1801223017Sdim        Opc = AtomicOpcTbl[Op][ConstantI8];
1802223017Sdim      else
1803223017Sdim        Opc = AtomicOpcTbl[Op][I8];
1804223017Sdim      break;
1805223017Sdim    case MVT::i16:
1806223017Sdim      if (isCN) {
1807223017Sdim        if (immSext8(Val.getNode()))
1808223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI16];
1809223017Sdim        else
1810223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI16];
1811223017Sdim      } else
1812223017Sdim        Opc = AtomicOpcTbl[Op][I16];
1813223017Sdim      break;
1814223017Sdim    case MVT::i32:
1815223017Sdim      if (isCN) {
1816223017Sdim        if (immSext8(Val.getNode()))
1817223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI32];
1818223017Sdim        else
1819223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI32];
1820223017Sdim      } else
1821223017Sdim        Opc = AtomicOpcTbl[Op][I32];
1822223017Sdim      break;
1823223017Sdim    case MVT::i64:
1824224145Sdim      Opc = AtomicOpcTbl[Op][I64];
1825223017Sdim      if (isCN) {
1826223017Sdim        if (immSext8(Val.getNode()))
1827223017Sdim          Opc = AtomicOpcTbl[Op][SextConstantI64];
1828223017Sdim        else if (i64immSExt32(Val.getNode()))
1829223017Sdim          Opc = AtomicOpcTbl[Op][ConstantI64];
1830224145Sdim      }
1831223017Sdim      break;
1832223017Sdim  }
1833245431Sdim
1834224145Sdim  assert(Opc != 0 && "Invalid arith lock transform!");
1835224145Sdim
1836245431Sdim  SDValue Ret;
1837223017Sdim  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
1838223017Sdim                                                 dl, NVT), 0);
1839223017Sdim  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1840223017Sdim  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1841245431Sdim  if (isUnOp) {
1842245431Sdim    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
1843252723Sdim    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
1844245431Sdim  } else {
1845245431Sdim    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
1846252723Sdim    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
1847245431Sdim  }
1848223017Sdim  cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
1849223017Sdim  SDValue RetVals[] = { Undef, Ret };
1850223017Sdim  return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1851223017Sdim}
1852223017Sdim
1853198090Srdivacky/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
1854198090Srdivacky/// any uses which require the SF or OF bits to be accurate.
1855198090Srdivackystatic bool HasNoSignedComparisonUses(SDNode *N) {
1856198090Srdivacky  // Examine each user of the node.
1857198090Srdivacky  for (SDNode::use_iterator UI = N->use_begin(),
1858198090Srdivacky         UE = N->use_end(); UI != UE; ++UI) {
1859198090Srdivacky    // Only examine CopyToReg uses.
1860198090Srdivacky    if (UI->getOpcode() != ISD::CopyToReg)
1861198090Srdivacky      return false;
1862198090Srdivacky    // Only examine CopyToReg uses that copy to EFLAGS.
1863198090Srdivacky    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
1864198090Srdivacky          X86::EFLAGS)
1865198090Srdivacky      return false;
1866198090Srdivacky    // Examine each user of the CopyToReg use.
1867198090Srdivacky    for (SDNode::use_iterator FlagUI = UI->use_begin(),
1868198090Srdivacky           FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
1869198090Srdivacky      // Only examine the Flag result.
1870198090Srdivacky      if (FlagUI.getUse().getResNo() != 1) continue;
1871198090Srdivacky      // Anything unusual: assume conservatively.
1872198090Srdivacky      if (!FlagUI->isMachineOpcode()) return false;
1873198090Srdivacky      // Examine the opcode of the user.
1874198090Srdivacky      switch (FlagUI->getMachineOpcode()) {
1875198090Srdivacky      // These comparisons don't treat the most significant bit specially.
1876198090Srdivacky      case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
1877198090Srdivacky      case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
1878198090Srdivacky      case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
1879198090Srdivacky      case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
1880203954Srdivacky      case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
1881203954Srdivacky      case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
1882198090Srdivacky      case X86::CMOVA16rr: case X86::CMOVA16rm:
1883198090Srdivacky      case X86::CMOVA32rr: case X86::CMOVA32rm:
1884198090Srdivacky      case X86::CMOVA64rr: case X86::CMOVA64rm:
1885198090Srdivacky      case X86::CMOVAE16rr: case X86::CMOVAE16rm:
1886198090Srdivacky      case X86::CMOVAE32rr: case X86::CMOVAE32rm:
1887198090Srdivacky      case X86::CMOVAE64rr: case X86::CMOVAE64rm:
1888198090Srdivacky      case X86::CMOVB16rr: case X86::CMOVB16rm:
1889198090Srdivacky      case X86::CMOVB32rr: case X86::CMOVB32rm:
1890198090Srdivacky      case X86::CMOVB64rr: case X86::CMOVB64rm:
1891198090Srdivacky      case X86::CMOVBE16rr: case X86::CMOVBE16rm:
1892198090Srdivacky      case X86::CMOVBE32rr: case X86::CMOVBE32rm:
1893198090Srdivacky      case X86::CMOVBE64rr: case X86::CMOVBE64rm:
1894198090Srdivacky      case X86::CMOVE16rr: case X86::CMOVE16rm:
1895198090Srdivacky      case X86::CMOVE32rr: case X86::CMOVE32rm:
1896198090Srdivacky      case X86::CMOVE64rr: case X86::CMOVE64rm:
1897198090Srdivacky      case X86::CMOVNE16rr: case X86::CMOVNE16rm:
1898198090Srdivacky      case X86::CMOVNE32rr: case X86::CMOVNE32rm:
1899198090Srdivacky      case X86::CMOVNE64rr: case X86::CMOVNE64rm:
1900198090Srdivacky      case X86::CMOVNP16rr: case X86::CMOVNP16rm:
1901198090Srdivacky      case X86::CMOVNP32rr: case X86::CMOVNP32rm:
1902198090Srdivacky      case X86::CMOVNP64rr: case X86::CMOVNP64rm:
1903198090Srdivacky      case X86::CMOVP16rr: case X86::CMOVP16rm:
1904198090Srdivacky      case X86::CMOVP32rr: case X86::CMOVP32rm:
1905198090Srdivacky      case X86::CMOVP64rr: case X86::CMOVP64rm:
1906198090Srdivacky        continue;
1907198090Srdivacky      // Anything else: assume conservatively.
1908198090Srdivacky      default: return false;
1909198090Srdivacky      }
1910198090Srdivacky    }
1911198090Srdivacky  }
1912198090Srdivacky  return true;
1913198090Srdivacky}
1914198090Srdivacky
1915235633Sdim/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
1916235633Sdim/// is suitable for doing the {load; increment or decrement; store} to modify
1917235633Sdim/// transformation.
1918245431Sdimstatic bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
1919235633Sdim                                SDValue StoredVal, SelectionDAG *CurDAG,
1920235633Sdim                                LoadSDNode* &LoadNode, SDValue &InputChain) {
1921235633Sdim
1922235633Sdim  // is the value stored the result of a DEC or INC?
1923235633Sdim  if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
1924235633Sdim
1925235633Sdim  // is the stored value result 0 of the load?
1926235633Sdim  if (StoredVal.getResNo() != 0) return false;
1927235633Sdim
1928235633Sdim  // are there other uses of the loaded value than the inc or dec?
1929235633Sdim  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
1930235633Sdim
1931235633Sdim  // is the store non-extending and non-indexed?
1932235633Sdim  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
1933235633Sdim    return false;
1934235633Sdim
1935235633Sdim  SDValue Load = StoredVal->getOperand(0);
1936235633Sdim  // Is the stored value a non-extending and non-indexed load?
1937235633Sdim  if (!ISD::isNormalLoad(Load.getNode())) return false;
1938235633Sdim
1939235633Sdim  // Return LoadNode by reference.
1940235633Sdim  LoadNode = cast<LoadSDNode>(Load);
1941235633Sdim  // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
1942245431Sdim  EVT LdVT = LoadNode->getMemoryVT();
1943245431Sdim  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
1944235633Sdim      LdVT != MVT::i8)
1945235633Sdim    return false;
1946235633Sdim
1947235633Sdim  // Is store the only read of the loaded value?
1948235633Sdim  if (!Load.hasOneUse())
1949235633Sdim    return false;
1950245431Sdim
1951235633Sdim  // Is the address of the store the same as the load?
1952235633Sdim  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
1953235633Sdim      LoadNode->getOffset() != StoreNode->getOffset())
1954235633Sdim    return false;
1955235633Sdim
1956235633Sdim  // Check if the chain is produced by the load or is a TokenFactor with
1957235633Sdim  // the load output chain as an operand. Return InputChain by reference.
1958235633Sdim  SDValue Chain = StoreNode->getChain();
1959235633Sdim
1960235633Sdim  bool ChainCheck = false;
1961235633Sdim  if (Chain == Load.getValue(1)) {
1962235633Sdim    ChainCheck = true;
1963235633Sdim    InputChain = LoadNode->getChain();
1964235633Sdim  } else if (Chain.getOpcode() == ISD::TokenFactor) {
1965235633Sdim    SmallVector<SDValue, 4> ChainOps;
1966235633Sdim    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
1967235633Sdim      SDValue Op = Chain.getOperand(i);
1968235633Sdim      if (Op == Load.getValue(1)) {
1969235633Sdim        ChainCheck = true;
1970235633Sdim        continue;
1971235633Sdim      }
1972245431Sdim
1973245431Sdim      // Make sure using Op as part of the chain would not cause a cycle here.
1974245431Sdim      // In theory, we could check whether the chain node is a predecessor of
1975245431Sdim      // the load. But that can be very expensive. Instead visit the uses and
1976245431Sdim      // make sure they all have smaller node id than the load.
1977245431Sdim      int LoadId = LoadNode->getNodeId();
1978245431Sdim      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
1979245431Sdim             UE = UI->use_end(); UI != UE; ++UI) {
1980245431Sdim        if (UI.getUse().getResNo() != 0)
1981245431Sdim          continue;
1982245431Sdim        if (UI->getNodeId() > LoadId)
1983245431Sdim          return false;
1984245431Sdim      }
1985245431Sdim
1986235633Sdim      ChainOps.push_back(Op);
1987235633Sdim    }
1988235633Sdim
1989235633Sdim    if (ChainCheck)
1990235633Sdim      // Make a new TokenFactor with all the other input chains except
1991235633Sdim      // for the load.
1992263509Sdim      InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
1993235633Sdim                                   MVT::Other, &ChainOps[0], ChainOps.size());
1994235633Sdim  }
1995235633Sdim  if (!ChainCheck)
1996235633Sdim    return false;
1997235633Sdim
1998235633Sdim  return true;
1999235633Sdim}
2000235633Sdim
2001235633Sdim/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
2002235633Sdim/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
2003235633Sdimstatic unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
2004235633Sdim  if (Opc == X86ISD::DEC) {
2005235633Sdim    if (LdVT == MVT::i64) return X86::DEC64m;
2006235633Sdim    if (LdVT == MVT::i32) return X86::DEC32m;
2007235633Sdim    if (LdVT == MVT::i16) return X86::DEC16m;
2008235633Sdim    if (LdVT == MVT::i8)  return X86::DEC8m;
2009235633Sdim  } else {
2010235633Sdim    assert(Opc == X86ISD::INC && "unrecognized opcode");
2011235633Sdim    if (LdVT == MVT::i64) return X86::INC64m;
2012235633Sdim    if (LdVT == MVT::i32) return X86::INC32m;
2013235633Sdim    if (LdVT == MVT::i16) return X86::INC16m;
2014235633Sdim    if (LdVT == MVT::i8)  return X86::INC8m;
2015235633Sdim  }
2016235633Sdim  llvm_unreachable("unrecognized size for LdVT");
2017235633Sdim}
2018235633Sdim
2019245431Sdim/// SelectGather - Customized ISel for GATHER operations.
2020245431Sdim///
2021245431SdimSDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
2022245431Sdim  // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
2023245431Sdim  SDValue Chain = Node->getOperand(0);
2024245431Sdim  SDValue VSrc = Node->getOperand(2);
2025245431Sdim  SDValue Base = Node->getOperand(3);
2026245431Sdim  SDValue VIdx = Node->getOperand(4);
2027245431Sdim  SDValue VMask = Node->getOperand(5);
2028245431Sdim  ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
2029245431Sdim  if (!Scale)
2030245431Sdim    return 0;
2031245431Sdim
2032245431Sdim  SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
2033245431Sdim                                   MVT::Other);
2034245431Sdim
2035245431Sdim  // Memory Operands: Base, Scale, Index, Disp, Segment
2036245431Sdim  SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
2037245431Sdim  SDValue Segment = CurDAG->getRegister(0, MVT::i32);
2038245431Sdim  const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
2039245431Sdim                          Disp, Segment, VMask, Chain};
2040263509Sdim  SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops);
2041245431Sdim  // Node has 2 outputs: VDst and MVT::Other.
2042245431Sdim  // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
2043245431Sdim  // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
2044245431Sdim  // of ResNode.
2045245431Sdim  ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
2046245431Sdim  ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
2047245431Sdim  return ResNode;
2048245431Sdim}
2049245431Sdim
2050202375SrdivackySDNode *X86DAGToDAGISel::Select(SDNode *Node) {
2051263509Sdim  MVT NVT = Node->getSimpleValueType(0);
2052193323Sed  unsigned Opc, MOpc;
2053193323Sed  unsigned Opcode = Node->getOpcode();
2054263509Sdim  SDLoc dl(Node);
2055245431Sdim
2056204642Srdivacky  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
2057193323Sed
2058193323Sed  if (Node->isMachineOpcode()) {
2059204642Srdivacky    DEBUG(dbgs() << "== ";  Node->dump(CurDAG); dbgs() << '\n');
2060255946Sdim    Node->setNodeId(-1);
2061193323Sed    return NULL;   // Already selected.
2062193323Sed  }
2063193323Sed
2064193323Sed  switch (Opcode) {
2065198090Srdivacky  default: break;
2066245431Sdim  case ISD::INTRINSIC_W_CHAIN: {
2067245431Sdim    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2068245431Sdim    switch (IntNo) {
2069245431Sdim    default: break;
2070245431Sdim    case Intrinsic::x86_avx2_gather_d_pd:
2071245431Sdim    case Intrinsic::x86_avx2_gather_d_pd_256:
2072245431Sdim    case Intrinsic::x86_avx2_gather_q_pd:
2073245431Sdim    case Intrinsic::x86_avx2_gather_q_pd_256:
2074245431Sdim    case Intrinsic::x86_avx2_gather_d_ps:
2075245431Sdim    case Intrinsic::x86_avx2_gather_d_ps_256:
2076245431Sdim    case Intrinsic::x86_avx2_gather_q_ps:
2077245431Sdim    case Intrinsic::x86_avx2_gather_q_ps_256:
2078245431Sdim    case Intrinsic::x86_avx2_gather_d_q:
2079245431Sdim    case Intrinsic::x86_avx2_gather_d_q_256:
2080245431Sdim    case Intrinsic::x86_avx2_gather_q_q:
2081245431Sdim    case Intrinsic::x86_avx2_gather_q_q_256:
2082245431Sdim    case Intrinsic::x86_avx2_gather_d_d:
2083245431Sdim    case Intrinsic::x86_avx2_gather_d_d_256:
2084245431Sdim    case Intrinsic::x86_avx2_gather_q_d:
2085245431Sdim    case Intrinsic::x86_avx2_gather_q_d_256: {
2086263509Sdim      if (!Subtarget->hasAVX2())
2087263509Sdim        break;
2088245431Sdim      unsigned Opc;
2089245431Sdim      switch (IntNo) {
2090245431Sdim      default: llvm_unreachable("Impossible intrinsic");
2091245431Sdim      case Intrinsic::x86_avx2_gather_d_pd:     Opc = X86::VGATHERDPDrm;  break;
2092245431Sdim      case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
2093245431Sdim      case Intrinsic::x86_avx2_gather_q_pd:     Opc = X86::VGATHERQPDrm;  break;
2094245431Sdim      case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
2095245431Sdim      case Intrinsic::x86_avx2_gather_d_ps:     Opc = X86::VGATHERDPSrm;  break;
2096245431Sdim      case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
2097245431Sdim      case Intrinsic::x86_avx2_gather_q_ps:     Opc = X86::VGATHERQPSrm;  break;
2098245431Sdim      case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
2099245431Sdim      case Intrinsic::x86_avx2_gather_d_q:      Opc = X86::VPGATHERDQrm;  break;
2100245431Sdim      case Intrinsic::x86_avx2_gather_d_q_256:  Opc = X86::VPGATHERDQYrm; break;
2101245431Sdim      case Intrinsic::x86_avx2_gather_q_q:      Opc = X86::VPGATHERQQrm;  break;
2102245431Sdim      case Intrinsic::x86_avx2_gather_q_q_256:  Opc = X86::VPGATHERQQYrm; break;
2103245431Sdim      case Intrinsic::x86_avx2_gather_d_d:      Opc = X86::VPGATHERDDrm;  break;
2104245431Sdim      case Intrinsic::x86_avx2_gather_d_d_256:  Opc = X86::VPGATHERDDYrm; break;
2105245431Sdim      case Intrinsic::x86_avx2_gather_q_d:      Opc = X86::VPGATHERQDrm;  break;
2106245431Sdim      case Intrinsic::x86_avx2_gather_q_d_256:  Opc = X86::VPGATHERQDYrm; break;
2107245431Sdim      }
2108245431Sdim      SDNode *RetVal = SelectGather(Node, Opc);
2109245431Sdim      if (RetVal)
2110245431Sdim        // We already called ReplaceUses inside SelectGather.
2111245431Sdim        return NULL;
2112245431Sdim      break;
2113245431Sdim    }
2114245431Sdim    }
2115245431Sdim    break;
2116245431Sdim  }
2117198090Srdivacky  case X86ISD::GlobalBaseReg:
2118198090Srdivacky    return getGlobalBaseReg();
2119193323Sed
2120245431Sdim
2121198090Srdivacky  case X86ISD::ATOMOR64_DAG:
2122198090Srdivacky  case X86ISD::ATOMXOR64_DAG:
2123198090Srdivacky  case X86ISD::ATOMADD64_DAG:
2124198090Srdivacky  case X86ISD::ATOMSUB64_DAG:
2125198090Srdivacky  case X86ISD::ATOMNAND64_DAG:
2126198090Srdivacky  case X86ISD::ATOMAND64_DAG:
2127245431Sdim  case X86ISD::ATOMMAX64_DAG:
2128245431Sdim  case X86ISD::ATOMMIN64_DAG:
2129245431Sdim  case X86ISD::ATOMUMAX64_DAG:
2130245431Sdim  case X86ISD::ATOMUMIN64_DAG:
2131245431Sdim  case X86ISD::ATOMSWAP64_DAG: {
2132245431Sdim    unsigned Opc;
2133245431Sdim    switch (Opcode) {
2134245431Sdim    default: llvm_unreachable("Impossible opcode");
2135245431Sdim    case X86ISD::ATOMOR64_DAG:   Opc = X86::ATOMOR6432;   break;
2136245431Sdim    case X86ISD::ATOMXOR64_DAG:  Opc = X86::ATOMXOR6432;  break;
2137245431Sdim    case X86ISD::ATOMADD64_DAG:  Opc = X86::ATOMADD6432;  break;
2138245431Sdim    case X86ISD::ATOMSUB64_DAG:  Opc = X86::ATOMSUB6432;  break;
2139245431Sdim    case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
2140245431Sdim    case X86ISD::ATOMAND64_DAG:  Opc = X86::ATOMAND6432;  break;
2141245431Sdim    case X86ISD::ATOMMAX64_DAG:  Opc = X86::ATOMMAX6432;  break;
2142245431Sdim    case X86ISD::ATOMMIN64_DAG:  Opc = X86::ATOMMIN6432;  break;
2143245431Sdim    case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
2144245431Sdim    case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
2145245431Sdim    case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
2146245431Sdim    }
2147245431Sdim    SDNode *RetVal = SelectAtomic64(Node, Opc);
2148198090Srdivacky    if (RetVal)
2149198090Srdivacky      return RetVal;
2150198090Srdivacky    break;
2151198090Srdivacky  }
2152245431Sdim
2153223017Sdim  case ISD::ATOMIC_LOAD_XOR:
2154223017Sdim  case ISD::ATOMIC_LOAD_AND:
2155245431Sdim  case ISD::ATOMIC_LOAD_OR:
2156245431Sdim  case ISD::ATOMIC_LOAD_ADD: {
2157223017Sdim    SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
2158223017Sdim    if (RetVal)
2159223017Sdim      return RetVal;
2160223017Sdim    break;
2161223017Sdim  }
2162221345Sdim  case ISD::AND:
2163221345Sdim  case ISD::OR:
2164221345Sdim  case ISD::XOR: {
2165221345Sdim    // For operations of the form (x << C1) op C2, check if we can use a smaller
2166221345Sdim    // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2167221345Sdim    SDValue N0 = Node->getOperand(0);
2168221345Sdim    SDValue N1 = Node->getOperand(1);
2169221345Sdim
2170221345Sdim    if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2171221345Sdim      break;
2172221345Sdim
2173221345Sdim    // i8 is unshrinkable, i16 should be promoted to i32.
2174221345Sdim    if (NVT != MVT::i32 && NVT != MVT::i64)
2175221345Sdim      break;
2176221345Sdim
2177221345Sdim    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
2178221345Sdim    ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2179221345Sdim    if (!Cst || !ShlCst)
2180221345Sdim      break;
2181221345Sdim
2182221345Sdim    int64_t Val = Cst->getSExtValue();
2183221345Sdim    uint64_t ShlVal = ShlCst->getZExtValue();
2184221345Sdim
2185221345Sdim    // Make sure that we don't change the operation by removing bits.
2186221345Sdim    // This only matters for OR and XOR, AND is unaffected.
2187245431Sdim    uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2188245431Sdim    if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2189221345Sdim      break;
2190221345Sdim
2191245431Sdim    unsigned ShlOp, Op;
2192263509Sdim    MVT CstVT = NVT;
2193221345Sdim
2194221345Sdim    // Check the minimum bitwidth for the new constant.
2195221345Sdim    // TODO: AND32ri is the same as AND64ri32 with zext imm.
2196221345Sdim    // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2197221345Sdim    // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2198221345Sdim    if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2199221345Sdim      CstVT = MVT::i8;
2200221345Sdim    else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2201221345Sdim      CstVT = MVT::i32;
2202221345Sdim
2203221345Sdim    // Bail if there is no smaller encoding.
2204221345Sdim    if (NVT == CstVT)
2205221345Sdim      break;
2206221345Sdim
2207263509Sdim    switch (NVT.SimpleTy) {
2208221345Sdim    default: llvm_unreachable("Unsupported VT!");
2209221345Sdim    case MVT::i32:
2210221345Sdim      assert(CstVT == MVT::i8);
2211221345Sdim      ShlOp = X86::SHL32ri;
2212221345Sdim
2213221345Sdim      switch (Opcode) {
2214245431Sdim      default: llvm_unreachable("Impossible opcode");
2215221345Sdim      case ISD::AND: Op = X86::AND32ri8; break;
2216221345Sdim      case ISD::OR:  Op =  X86::OR32ri8; break;
2217221345Sdim      case ISD::XOR: Op = X86::XOR32ri8; break;
2218221345Sdim      }
2219221345Sdim      break;
2220221345Sdim    case MVT::i64:
2221221345Sdim      assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2222221345Sdim      ShlOp = X86::SHL64ri;
2223221345Sdim
2224221345Sdim      switch (Opcode) {
2225245431Sdim      default: llvm_unreachable("Impossible opcode");
2226221345Sdim      case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2227221345Sdim      case ISD::OR:  Op = CstVT==MVT::i8?  X86::OR64ri8 :  X86::OR64ri32; break;
2228221345Sdim      case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2229221345Sdim      }
2230221345Sdim      break;
2231221345Sdim    }
2232221345Sdim
2233221345Sdim    // Emit the smaller op and the shift.
2234221345Sdim    SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
2235221345Sdim    SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2236221345Sdim    return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2237221345Sdim                                getI8Imm(ShlVal));
2238221345Sdim  }
2239218893Sdim  case X86ISD::UMUL: {
2240218893Sdim    SDValue N0 = Node->getOperand(0);
2241218893Sdim    SDValue N1 = Node->getOperand(1);
2242245431Sdim
2243218893Sdim    unsigned LoReg;
2244263509Sdim    switch (NVT.SimpleTy) {
2245218893Sdim    default: llvm_unreachable("Unsupported VT!");
2246218893Sdim    case MVT::i8:  LoReg = X86::AL;  Opc = X86::MUL8r; break;
2247218893Sdim    case MVT::i16: LoReg = X86::AX;  Opc = X86::MUL16r; break;
2248218893Sdim    case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2249218893Sdim    case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2250218893Sdim    }
2251245431Sdim
2252218893Sdim    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2253218893Sdim                                          N0, SDValue()).getValue(1);
2254245431Sdim
2255218893Sdim    SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2256218893Sdim    SDValue Ops[] = {N1, InFlag};
2257252723Sdim    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2258245431Sdim
2259218893Sdim    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
2260218893Sdim    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
2261218893Sdim    ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
2262218893Sdim    return NULL;
2263218893Sdim  }
2264245431Sdim
2265198090Srdivacky  case ISD::SMUL_LOHI:
2266198090Srdivacky  case ISD::UMUL_LOHI: {
2267198090Srdivacky    SDValue N0 = Node->getOperand(0);
2268198090Srdivacky    SDValue N1 = Node->getOperand(1);
2269193323Sed
2270198090Srdivacky    bool isSigned = Opcode == ISD::SMUL_LOHI;
2271245431Sdim    bool hasBMI2 = Subtarget->hasBMI2();
2272198090Srdivacky    if (!isSigned) {
2273263509Sdim      switch (NVT.SimpleTy) {
2274198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2275198090Srdivacky      case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
2276198090Srdivacky      case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
2277245431Sdim      case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2278245431Sdim                     MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2279245431Sdim      case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2280245431Sdim                     MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2281193323Sed      }
2282198090Srdivacky    } else {
2283263509Sdim      switch (NVT.SimpleTy) {
2284198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2285198090Srdivacky      case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
2286198090Srdivacky      case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
2287198090Srdivacky      case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2288198090Srdivacky      case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2289193323Sed      }
2290198090Srdivacky    }
2291193323Sed
2292245431Sdim    unsigned SrcReg, LoReg, HiReg;
2293245431Sdim    switch (Opc) {
2294245431Sdim    default: llvm_unreachable("Unknown MUL opcode!");
2295245431Sdim    case X86::IMUL8r:
2296245431Sdim    case X86::MUL8r:
2297245431Sdim      SrcReg = LoReg = X86::AL; HiReg = X86::AH;
2298245431Sdim      break;
2299245431Sdim    case X86::IMUL16r:
2300245431Sdim    case X86::MUL16r:
2301245431Sdim      SrcReg = LoReg = X86::AX; HiReg = X86::DX;
2302245431Sdim      break;
2303245431Sdim    case X86::IMUL32r:
2304245431Sdim    case X86::MUL32r:
2305245431Sdim      SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2306245431Sdim      break;
2307245431Sdim    case X86::IMUL64r:
2308245431Sdim    case X86::MUL64r:
2309245431Sdim      SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2310245431Sdim      break;
2311245431Sdim    case X86::MULX32rr:
2312245431Sdim      SrcReg = X86::EDX; LoReg = HiReg = 0;
2313245431Sdim      break;
2314245431Sdim    case X86::MULX64rr:
2315245431Sdim      SrcReg = X86::RDX; LoReg = HiReg = 0;
2316245431Sdim      break;
2317198090Srdivacky    }
2318193323Sed
2319198090Srdivacky    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2320202375Srdivacky    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2321198090Srdivacky    // Multiply is commmutative.
2322198090Srdivacky    if (!foldedLoad) {
2323202375Srdivacky      foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2324198090Srdivacky      if (foldedLoad)
2325198090Srdivacky        std::swap(N0, N1);
2326198090Srdivacky    }
2327193323Sed
2328245431Sdim    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2329245431Sdim                                          N0, SDValue()).getValue(1);
2330245431Sdim    SDValue ResHi, ResLo;
2331198090Srdivacky
2332198090Srdivacky    if (foldedLoad) {
2333245431Sdim      SDValue Chain;
2334198090Srdivacky      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2335198090Srdivacky                        InFlag };
2336245431Sdim      if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
2337245431Sdim        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
2338252723Sdim        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2339245431Sdim        ResHi = SDValue(CNode, 0);
2340245431Sdim        ResLo = SDValue(CNode, 1);
2341245431Sdim        Chain = SDValue(CNode, 2);
2342245431Sdim        InFlag = SDValue(CNode, 3);
2343245431Sdim      } else {
2344245431Sdim        SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2345252723Sdim        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2346245431Sdim        Chain = SDValue(CNode, 0);
2347245431Sdim        InFlag = SDValue(CNode, 1);
2348245431Sdim      }
2349218893Sdim
2350198090Srdivacky      // Update the chain.
2351245431Sdim      ReplaceUses(N1.getValue(1), Chain);
2352198090Srdivacky    } else {
2353245431Sdim      SDValue Ops[] = { N1, InFlag };
2354245431Sdim      if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
2355245431Sdim        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
2356252723Sdim        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2357245431Sdim        ResHi = SDValue(CNode, 0);
2358245431Sdim        ResLo = SDValue(CNode, 1);
2359245431Sdim        InFlag = SDValue(CNode, 2);
2360245431Sdim      } else {
2361245431Sdim        SDVTList VTs = CurDAG->getVTList(MVT::Glue);
2362252723Sdim        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2363245431Sdim        InFlag = SDValue(CNode, 0);
2364245431Sdim      }
2365198090Srdivacky    }
2366198090Srdivacky
2367210299Sed    // Prevent use of AH in a REX instruction by referencing AX instead.
2368210299Sed    if (HiReg == X86::AH && Subtarget->is64Bit() &&
2369210299Sed        !SDValue(Node, 1).use_empty()) {
2370210299Sed      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2371210299Sed                                              X86::AX, MVT::i16, InFlag);
2372210299Sed      InFlag = Result.getValue(2);
2373210299Sed      // Get the low part if needed. Don't use getCopyFromReg for aliasing
2374210299Sed      // registers.
2375210299Sed      if (!SDValue(Node, 0).use_empty())
2376210299Sed        ReplaceUses(SDValue(Node, 1),
2377210299Sed          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2378210299Sed
2379210299Sed      // Shift AX down 8 bits.
2380210299Sed      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2381210299Sed                                              Result,
2382210299Sed                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
2383210299Sed      // Then truncate it down to i8.
2384210299Sed      ReplaceUses(SDValue(Node, 1),
2385210299Sed        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2386210299Sed    }
2387198090Srdivacky    // Copy the low half of the result, if it is needed.
2388202375Srdivacky    if (!SDValue(Node, 0).use_empty()) {
2389245431Sdim      if (ResLo.getNode() == 0) {
2390245431Sdim        assert(LoReg && "Register for low half is not defined!");
2391245431Sdim        ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
2392245431Sdim                                       InFlag);
2393245431Sdim        InFlag = ResLo.getValue(2);
2394245431Sdim      }
2395245431Sdim      ReplaceUses(SDValue(Node, 0), ResLo);
2396245431Sdim      DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
2397198090Srdivacky    }
2398198090Srdivacky    // Copy the high half of the result, if it is needed.
2399202375Srdivacky    if (!SDValue(Node, 1).use_empty()) {
2400245431Sdim      if (ResHi.getNode() == 0) {
2401245431Sdim        assert(HiReg && "Register for high half is not defined!");
2402245431Sdim        ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
2403245431Sdim                                       InFlag);
2404245431Sdim        InFlag = ResHi.getValue(2);
2405245431Sdim      }
2406245431Sdim      ReplaceUses(SDValue(Node, 1), ResHi);
2407245431Sdim      DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
2408198090Srdivacky    }
2409245431Sdim
2410198090Srdivacky    return NULL;
2411198090Srdivacky  }
2412193323Sed
2413198090Srdivacky  case ISD::SDIVREM:
2414198090Srdivacky  case ISD::UDIVREM: {
2415198090Srdivacky    SDValue N0 = Node->getOperand(0);
2416198090Srdivacky    SDValue N1 = Node->getOperand(1);
2417193323Sed
2418198090Srdivacky    bool isSigned = Opcode == ISD::SDIVREM;
2419198090Srdivacky    if (!isSigned) {
2420263509Sdim      switch (NVT.SimpleTy) {
2421198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2422198090Srdivacky      case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
2423198090Srdivacky      case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
2424198090Srdivacky      case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
2425198090Srdivacky      case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
2426193323Sed      }
2427198090Srdivacky    } else {
2428263509Sdim      switch (NVT.SimpleTy) {
2429198090Srdivacky      default: llvm_unreachable("Unsupported VT!");
2430198090Srdivacky      case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
2431198090Srdivacky      case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
2432198090Srdivacky      case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
2433198090Srdivacky      case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
2434198090Srdivacky      }
2435198090Srdivacky    }
2436193323Sed
2437201360Srdivacky    unsigned LoReg, HiReg, ClrReg;
2438263509Sdim    unsigned SExtOpcode;
2439263509Sdim    switch (NVT.SimpleTy) {
2440198090Srdivacky    default: llvm_unreachable("Unsupported VT!");
2441198090Srdivacky    case MVT::i8:
2442201360Srdivacky      LoReg = X86::AL;  ClrReg = HiReg = X86::AH;
2443198090Srdivacky      SExtOpcode = X86::CBW;
2444198090Srdivacky      break;
2445198090Srdivacky    case MVT::i16:
2446198090Srdivacky      LoReg = X86::AX;  HiReg = X86::DX;
2447263509Sdim      ClrReg = X86::DX;
2448198090Srdivacky      SExtOpcode = X86::CWD;
2449198090Srdivacky      break;
2450198090Srdivacky    case MVT::i32:
2451201360Srdivacky      LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
2452198090Srdivacky      SExtOpcode = X86::CDQ;
2453198090Srdivacky      break;
2454198090Srdivacky    case MVT::i64:
2455201360Srdivacky      LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
2456198090Srdivacky      SExtOpcode = X86::CQO;
2457198090Srdivacky      break;
2458198090Srdivacky    }
2459193323Sed
2460198090Srdivacky    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2461202375Srdivacky    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2462198090Srdivacky    bool signBitIsZero = CurDAG->SignBitIsZero(N0);
2463198090Srdivacky
2464198090Srdivacky    SDValue InFlag;
2465198090Srdivacky    if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
2466198090Srdivacky      // Special case for div8, just use a move with zero extension to AX to
2467198090Srdivacky      // clear the upper 8 bits (AH).
2468198090Srdivacky      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
2469202375Srdivacky      if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2470198090Srdivacky        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
2471198090Srdivacky        Move =
2472223017Sdim          SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
2473252723Sdim                                         MVT::Other, Ops), 0);
2474198090Srdivacky        Chain = Move.getValue(1);
2475198090Srdivacky        ReplaceUses(N0.getValue(1), Chain);
2476193323Sed      } else {
2477198090Srdivacky        Move =
2478223017Sdim          SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
2479198090Srdivacky        Chain = CurDAG->getEntryNode();
2480198090Srdivacky      }
2481223017Sdim      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
2482198090Srdivacky      InFlag = Chain.getValue(1);
2483198090Srdivacky    } else {
2484198090Srdivacky      InFlag =
2485198090Srdivacky        CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
2486198090Srdivacky                             LoReg, N0, SDValue()).getValue(1);
2487198090Srdivacky      if (isSigned && !signBitIsZero) {
2488198090Srdivacky        // Sign extend the low part into the high part.
2489193323Sed        InFlag =
2490218893Sdim          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
2491198090Srdivacky      } else {
2492198090Srdivacky        // Zero out the high part, effectively zero extending the input.
2493263509Sdim        SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
2494263509Sdim        switch (NVT.SimpleTy) {
2495263509Sdim        case MVT::i16:
2496263509Sdim          ClrNode =
2497263509Sdim              SDValue(CurDAG->getMachineNode(
2498263509Sdim                          TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
2499263509Sdim                          CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)),
2500263509Sdim                      0);
2501263509Sdim          break;
2502263509Sdim        case MVT::i32:
2503263509Sdim          break;
2504263509Sdim        case MVT::i64:
2505263509Sdim          ClrNode =
2506263509Sdim              SDValue(CurDAG->getMachineNode(
2507263509Sdim                          TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2508263509Sdim                          CurDAG->getTargetConstant(0, MVT::i64), ClrNode,
2509263509Sdim                          CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
2510263509Sdim                      0);
2511263509Sdim          break;
2512263509Sdim        default:
2513263509Sdim          llvm_unreachable("Unexpected division source");
2514263509Sdim        }
2515263509Sdim
2516201360Srdivacky        InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
2517198090Srdivacky                                      ClrNode, InFlag).getValue(1);
2518193323Sed      }
2519198090Srdivacky    }
2520193323Sed
2521198090Srdivacky    if (foldedLoad) {
2522198090Srdivacky      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2523198090Srdivacky                        InFlag };
2524198090Srdivacky      SDNode *CNode =
2525252723Sdim        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
2526198090Srdivacky      InFlag = SDValue(CNode, 1);
2527198090Srdivacky      // Update the chain.
2528198090Srdivacky      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
2529198090Srdivacky    } else {
2530198090Srdivacky      InFlag =
2531218893Sdim        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
2532198090Srdivacky    }
2533198090Srdivacky
2534210299Sed    // Prevent use of AH in a REX instruction by referencing AX instead.
2535210299Sed    // Shift it down 8 bits.
2536263509Sdim    //
2537263509Sdim    // The current assumption of the register allocator is that isel
2538263509Sdim    // won't generate explicit references to the GPR8_NOREX registers. If
2539263509Sdim    // the allocator and/or the backend get enhanced to be more robust in
2540263509Sdim    // that regard, this can be, and should be, removed.
2541210299Sed    if (HiReg == X86::AH && Subtarget->is64Bit() &&
2542210299Sed        !SDValue(Node, 1).use_empty()) {
2543210299Sed      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2544210299Sed                                              X86::AX, MVT::i16, InFlag);
2545210299Sed      InFlag = Result.getValue(2);
2546210299Sed
2547210299Sed      // If we also need AL (the quotient), get it by extracting a subreg from
2548210299Sed      // Result. The fast register allocator does not like multiple CopyFromReg
2549210299Sed      // nodes using aliasing registers.
2550210299Sed      if (!SDValue(Node, 0).use_empty())
2551210299Sed        ReplaceUses(SDValue(Node, 0),
2552210299Sed          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2553210299Sed
2554210299Sed      // Shift AX right by 8 bits instead of using AH.
2555210299Sed      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2556210299Sed                                         Result,
2557210299Sed                                         CurDAG->getTargetConstant(8, MVT::i8)),
2558210299Sed                       0);
2559210299Sed      ReplaceUses(SDValue(Node, 1),
2560210299Sed        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2561210299Sed    }
2562198090Srdivacky    // Copy the division (low) result, if it is needed.
2563202375Srdivacky    if (!SDValue(Node, 0).use_empty()) {
2564198090Srdivacky      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2565198090Srdivacky                                                LoReg, NVT, InFlag);
2566198090Srdivacky      InFlag = Result.getValue(2);
2567202375Srdivacky      ReplaceUses(SDValue(Node, 0), Result);
2568204642Srdivacky      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2569198090Srdivacky    }
2570198090Srdivacky    // Copy the remainder (high) result, if it is needed.
2571202375Srdivacky    if (!SDValue(Node, 1).use_empty()) {
2572210299Sed      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2573210299Sed                                              HiReg, NVT, InFlag);
2574210299Sed      InFlag = Result.getValue(2);
2575202375Srdivacky      ReplaceUses(SDValue(Node, 1), Result);
2576204642Srdivacky      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2577198090Srdivacky    }
2578198090Srdivacky    return NULL;
2579198090Srdivacky  }
2580193323Sed
2581245431Sdim  case X86ISD::CMP:
2582245431Sdim  case X86ISD::SUB: {
2583245431Sdim    // Sometimes a SUB is used to perform comparison.
2584245431Sdim    if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
2585245431Sdim      // This node is not a CMP.
2586245431Sdim      break;
2587198090Srdivacky    SDValue N0 = Node->getOperand(0);
2588198090Srdivacky    SDValue N1 = Node->getOperand(1);
2589198090Srdivacky
2590198090Srdivacky    // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2591198090Srdivacky    // use a smaller encoding.
2592212904Sdim    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
2593212904Sdim        HasNoSignedComparisonUses(Node))
2594207618Srdivacky      // Look past the truncate if CMP is the only use of it.
2595207618Srdivacky      N0 = N0.getOperand(0);
2596235633Sdim    if ((N0.getNode()->getOpcode() == ISD::AND ||
2597235633Sdim         (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
2598235633Sdim        N0.getNode()->hasOneUse() &&
2599198090Srdivacky        N0.getValueType() != MVT::i8 &&
2600198090Srdivacky        X86::isZeroNode(N1)) {
2601198090Srdivacky      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
2602198090Srdivacky      if (!C) break;
2603198090Srdivacky
2604198090Srdivacky      // For example, convert "testl %eax, $8" to "testb %al, $8"
2605198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
2606198090Srdivacky          (!(C->getZExtValue() & 0x80) ||
2607198090Srdivacky           HasNoSignedComparisonUses(Node))) {
2608198090Srdivacky        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
2609198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2610198090Srdivacky
2611198090Srdivacky        // On x86-32, only the ABCD registers have 8-bit subregisters.
2612198090Srdivacky        if (!Subtarget->is64Bit()) {
2613235633Sdim          const TargetRegisterClass *TRC;
2614263509Sdim          switch (N0.getSimpleValueType().SimpleTy) {
2615198090Srdivacky          case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2616198090Srdivacky          case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2617198090Srdivacky          default: llvm_unreachable("Unsupported TEST operand type!");
2618198090Srdivacky          }
2619198090Srdivacky          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
2620198090Srdivacky          Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2621198090Srdivacky                                               Reg.getValueType(), Reg, RC), 0);
2622198090Srdivacky        }
2623198090Srdivacky
2624198090Srdivacky        // Extract the l-register.
2625208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
2626198090Srdivacky                                                        MVT::i8, Reg);
2627198090Srdivacky
2628198090Srdivacky        // Emit a testb.
2629245431Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
2630245431Sdim                                                 Subreg, Imm);
2631245431Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2632245431Sdim        // one, do not call ReplaceAllUsesWith.
2633245431Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2634245431Sdim                    SDValue(NewNode, 0));
2635245431Sdim        return NULL;
2636193323Sed      }
2637198090Srdivacky
2638198090Srdivacky      // For example, "testl %eax, $2048" to "testb %ah, $8".
2639198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
2640198090Srdivacky          (!(C->getZExtValue() & 0x8000) ||
2641198090Srdivacky           HasNoSignedComparisonUses(Node))) {
2642198090Srdivacky        // Shift the immediate right by 8 bits.
2643198090Srdivacky        SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
2644198090Srdivacky                                                       MVT::i8);
2645198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2646198090Srdivacky
2647198090Srdivacky        // Put the value in an ABCD register.
2648235633Sdim        const TargetRegisterClass *TRC;
2649263509Sdim        switch (N0.getSimpleValueType().SimpleTy) {
2650198090Srdivacky        case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
2651198090Srdivacky        case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2652198090Srdivacky        case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2653198090Srdivacky        default: llvm_unreachable("Unsupported TEST operand type!");
2654198090Srdivacky        }
2655198090Srdivacky        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
2656198090Srdivacky        Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2657198090Srdivacky                                             Reg.getValueType(), Reg, RC), 0);
2658198090Srdivacky
2659198090Srdivacky        // Extract the h-register.
2660208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
2661198090Srdivacky                                                        MVT::i8, Reg);
2662198090Srdivacky
2663226890Sdim        // Emit a testb.  The EXTRACT_SUBREG becomes a COPY that can only
2664226890Sdim        // target GR8_NOREX registers, so make sure the register class is
2665226890Sdim        // forced.
2666245431Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
2667245431Sdim                                                 MVT::i32, Subreg, ShiftedImm);
2668245431Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2669245431Sdim        // one, do not call ReplaceAllUsesWith.
2670245431Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2671245431Sdim                    SDValue(NewNode, 0));
2672245431Sdim        return NULL;
2673193323Sed      }
2674198090Srdivacky
2675198090Srdivacky      // For example, "testl %eax, $32776" to "testw %ax, $32776".
2676198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
2677198090Srdivacky          N0.getValueType() != MVT::i16 &&
2678198090Srdivacky          (!(C->getZExtValue() & 0x8000) ||
2679198090Srdivacky           HasNoSignedComparisonUses(Node))) {
2680198090Srdivacky        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
2681198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2682198090Srdivacky
2683198090Srdivacky        // Extract the 16-bit subregister.
2684208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
2685198090Srdivacky                                                        MVT::i16, Reg);
2686198090Srdivacky
2687198090Srdivacky        // Emit a testw.
2688245431Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
2689245431Sdim                                                 Subreg, Imm);
2690245431Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2691245431Sdim        // one, do not call ReplaceAllUsesWith.
2692245431Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2693245431Sdim                    SDValue(NewNode, 0));
2694245431Sdim        return NULL;
2695193323Sed      }
2696198090Srdivacky
2697198090Srdivacky      // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
2698198090Srdivacky      if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
2699198090Srdivacky          N0.getValueType() == MVT::i64 &&
2700198090Srdivacky          (!(C->getZExtValue() & 0x80000000) ||
2701198090Srdivacky           HasNoSignedComparisonUses(Node))) {
2702198090Srdivacky        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
2703198090Srdivacky        SDValue Reg = N0.getNode()->getOperand(0);
2704198090Srdivacky
2705198090Srdivacky        // Extract the 32-bit subregister.
2706208599Srdivacky        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
2707198090Srdivacky                                                        MVT::i32, Reg);
2708198090Srdivacky
2709198090Srdivacky        // Emit a testl.
2710245431Sdim        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
2711245431Sdim                                                 Subreg, Imm);
2712245431Sdim        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2713245431Sdim        // one, do not call ReplaceAllUsesWith.
2714245431Sdim        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2715245431Sdim                    SDValue(NewNode, 0));
2716245431Sdim        return NULL;
2717198090Srdivacky      }
2718193323Sed    }
2719198090Srdivacky    break;
2720193323Sed  }
2721235633Sdim  case ISD::STORE: {
2722235633Sdim    // Change a chain of {load; incr or dec; store} of the same value into
2723235633Sdim    // a simple increment or decrement through memory of that value, if the
2724235633Sdim    // uses of the modified value and its address are suitable.
2725235633Sdim    // The DEC64m tablegen pattern is currently not able to match the case where
2726245431Sdim    // the EFLAGS on the original DEC are used. (This also applies to
2727235633Sdim    // {INC,DEC}X{64,32,16,8}.)
2728235633Sdim    // We'll need to improve tablegen to allow flags to be transferred from a
2729235633Sdim    // node in the pattern to the result node.  probably with a new keyword
2730235633Sdim    // for example, we have this
2731235633Sdim    // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2732235633Sdim    //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2733235633Sdim    //   (implicit EFLAGS)]>;
2734235633Sdim    // but maybe need something like this
2735235633Sdim    // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2736235633Sdim    //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2737235633Sdim    //   (transferrable EFLAGS)]>;
2738235633Sdim
2739235633Sdim    StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2740235633Sdim    SDValue StoredVal = StoreNode->getOperand(1);
2741235633Sdim    unsigned Opc = StoredVal->getOpcode();
2742235633Sdim
2743235633Sdim    LoadSDNode *LoadNode = 0;
2744235633Sdim    SDValue InputChain;
2745235633Sdim    if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
2746235633Sdim                             LoadNode, InputChain))
2747235633Sdim      break;
2748235633Sdim
2749235633Sdim    SDValue Base, Scale, Index, Disp, Segment;
2750235633Sdim    if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
2751235633Sdim                    Base, Scale, Index, Disp, Segment))
2752235633Sdim      break;
2753235633Sdim
2754235633Sdim    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2755235633Sdim    MemOp[0] = StoreNode->getMemOperand();
2756235633Sdim    MemOp[1] = LoadNode->getMemOperand();
2757235633Sdim    const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
2758245431Sdim    EVT LdVT = LoadNode->getMemoryVT();
2759235633Sdim    unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
2760235633Sdim    MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
2761263509Sdim                                                   SDLoc(Node),
2762252723Sdim                                                   MVT::i32, MVT::Other, Ops);
2763235633Sdim    Result->setMemRefs(MemOp, MemOp + 2);
2764235633Sdim
2765235633Sdim    ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2766235633Sdim    ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2767235633Sdim
2768235633Sdim    return Result;
2769198090Srdivacky  }
2770235633Sdim  }
2771193323Sed
2772202375Srdivacky  SDNode *ResNode = SelectCode(Node);
2773193323Sed
2774204642Srdivacky  DEBUG(dbgs() << "=> ";
2775204642Srdivacky        if (ResNode == NULL || ResNode == Node)
2776204642Srdivacky          Node->dump(CurDAG);
2777204642Srdivacky        else
2778204642Srdivacky          ResNode->dump(CurDAG);
2779204642Srdivacky        dbgs() << '\n');
2780193323Sed
2781193323Sed  return ResNode;
2782193323Sed}
2783193323Sed
2784193323Sedbool X86DAGToDAGISel::
2785193323SedSelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
2786193323Sed                             std::vector<SDValue> &OutOps) {
2787193323Sed  SDValue Op0, Op1, Op2, Op3, Op4;
2788193323Sed  switch (ConstraintCode) {
2789193323Sed  case 'o':   // offsetable        ??
2790193323Sed  case 'v':   // not offsetable    ??
2791193323Sed  default: return true;
2792193323Sed  case 'm':   // memory
2793218893Sdim    if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
2794193323Sed      return true;
2795193323Sed    break;
2796193323Sed  }
2797245431Sdim
2798193323Sed  OutOps.push_back(Op0);
2799193323Sed  OutOps.push_back(Op1);
2800193323Sed  OutOps.push_back(Op2);
2801193323Sed  OutOps.push_back(Op3);
2802193323Sed  OutOps.push_back(Op4);
2803193323Sed  return false;
2804193323Sed}
2805193323Sed
2806245431Sdim/// createX86ISelDag - This pass converts a legalized DAG into a
2807193323Sed/// X86-specific DAG, ready for instruction scheduling.
2808193323Sed///
2809193323SedFunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
2810235633Sdim                                     CodeGenOpt::Level OptLevel) {
2811193323Sed  return new X86DAGToDAGISel(TM, OptLevel);
2812193323Sed}
2813