1193323Sed//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file defines a DAG pattern matching instruction selector for X86, 11193323Sed// converting from a legalized dag to a X86 dag. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#define DEBUG_TYPE "x86-isel" 16193323Sed#include "X86.h" 17193323Sed#include "X86InstrBuilder.h" 18193323Sed#include "X86MachineFunctionInfo.h" 19193323Sed#include "X86RegisterInfo.h" 20193323Sed#include "X86Subtarget.h" 21193323Sed#include "X86TargetMachine.h" 22252723Sdim#include "llvm/ADT/Statistic.h" 23252723Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 24193323Sed#include "llvm/CodeGen/MachineFunction.h" 25193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 26193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 27193323Sed#include "llvm/CodeGen/SelectionDAGISel.h" 28252723Sdim#include "llvm/IR/Instructions.h" 29252723Sdim#include "llvm/IR/Intrinsics.h" 30252723Sdim#include "llvm/IR/Type.h" 31193323Sed#include "llvm/Support/Debug.h" 32198090Srdivacky#include "llvm/Support/ErrorHandling.h" 33193323Sed#include "llvm/Support/MathExtras.h" 34198090Srdivacky#include "llvm/Support/raw_ostream.h" 35252723Sdim#include "llvm/Target/TargetMachine.h" 36252723Sdim#include "llvm/Target/TargetOptions.h" 37193323Sedusing namespace llvm; 38193323Sed 39193323SedSTATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 40193323Sed 41193323Sed//===----------------------------------------------------------------------===// 42193323Sed// Pattern Matcher Implementation 43193323Sed//===----------------------------------------------------------------------===// 44193323Sed 45193323Sednamespace { 46193323Sed /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 47193323Sed /// SDValue's instead of register numbers for the leaves of the matched 48193323Sed /// tree. 49193323Sed struct X86ISelAddressMode { 50193323Sed enum { 51193323Sed RegBase, 52193323Sed FrameIndexBase 53193323Sed } BaseType; 54193323Sed 55207618Srdivacky // This is really a union, discriminated by BaseType! 56207618Srdivacky SDValue Base_Reg; 57207618Srdivacky int Base_FrameIndex; 58193323Sed 59193323Sed unsigned Scale; 60245431Sdim SDValue IndexReg; 61193323Sed int32_t Disp; 62193323Sed SDValue Segment; 63207618Srdivacky const GlobalValue *GV; 64207618Srdivacky const Constant *CP; 65207618Srdivacky const BlockAddress *BlockAddr; 66193323Sed const char *ES; 67193323Sed int JT; 68193323Sed unsigned Align; // CP alignment. 69195098Sed unsigned char SymbolFlags; // X86II::MO_* 70193323Sed 71193323Sed X86ISelAddressMode() 72207618Srdivacky : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), 73198892Srdivacky Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), 74198090Srdivacky SymbolFlags(X86II::MO_NO_FLAG) { 75193323Sed } 76193323Sed 77193323Sed bool hasSymbolicDisplacement() const { 78198892Srdivacky return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; 79193323Sed } 80245431Sdim 81195098Sed bool hasBaseOrIndexReg() const { 82263509Sdim return BaseType == FrameIndexBase || 83263509Sdim IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; 84195098Sed } 85245431Sdim 86195098Sed /// isRIPRelative - Return true if this addressing mode is already RIP 87195098Sed /// relative. 88195098Sed bool isRIPRelative() const { 89195098Sed if (BaseType != RegBase) return false; 90195098Sed if (RegisterSDNode *RegNode = 91207618Srdivacky dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) 92195098Sed return RegNode->getReg() == X86::RIP; 93195098Sed return false; 94195098Sed } 95245431Sdim 96195098Sed void setBaseReg(SDValue Reg) { 97195098Sed BaseType = RegBase; 98207618Srdivacky Base_Reg = Reg; 99195098Sed } 100193323Sed 101245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 102193323Sed void dump() { 103202375Srdivacky dbgs() << "X86ISelAddressMode " << this << '\n'; 104207618Srdivacky dbgs() << "Base_Reg "; 105207618Srdivacky if (Base_Reg.getNode() != 0) 106245431Sdim Base_Reg.getNode()->dump(); 107198090Srdivacky else 108202375Srdivacky dbgs() << "nul"; 109207618Srdivacky dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' 110198090Srdivacky << " Scale" << Scale << '\n' 111198090Srdivacky << "IndexReg "; 112198090Srdivacky if (IndexReg.getNode() != 0) 113198090Srdivacky IndexReg.getNode()->dump(); 114198090Srdivacky else 115245431Sdim dbgs() << "nul"; 116202375Srdivacky dbgs() << " Disp " << Disp << '\n' 117198090Srdivacky << "GV "; 118198090Srdivacky if (GV) 119198090Srdivacky GV->dump(); 120198090Srdivacky else 121202375Srdivacky dbgs() << "nul"; 122202375Srdivacky dbgs() << " CP "; 123198090Srdivacky if (CP) 124198090Srdivacky CP->dump(); 125198090Srdivacky else 126202375Srdivacky dbgs() << "nul"; 127202375Srdivacky dbgs() << '\n' 128198090Srdivacky << "ES "; 129198090Srdivacky if (ES) 130202375Srdivacky dbgs() << ES; 131198090Srdivacky else 132202375Srdivacky dbgs() << "nul"; 133202375Srdivacky dbgs() << " JT" << JT << " Align" << Align << '\n'; 134193323Sed } 135245431Sdim#endif 136193323Sed }; 137193323Sed} 138193323Sed 139193323Sednamespace { 140193323Sed //===--------------------------------------------------------------------===// 141193323Sed /// ISel - X86 specific code to select X86 machine instructions for 142193323Sed /// SelectionDAG operations. 143193323Sed /// 144198892Srdivacky class X86DAGToDAGISel : public SelectionDAGISel { 145193323Sed /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 146193323Sed /// make the right decision when generating code for different targets. 147193323Sed const X86Subtarget *Subtarget; 148193323Sed 149193323Sed /// OptForSize - If true, selector should try to optimize for code size 150193323Sed /// instead of performance. 151193323Sed bool OptForSize; 152193323Sed 153193323Sed public: 154193323Sed explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 155193323Sed : SelectionDAGISel(tm, OptLevel), 156193399Sed Subtarget(&tm.getSubtarget<X86Subtarget>()), 157193323Sed OptForSize(false) {} 158193323Sed 159193323Sed virtual const char *getPassName() const { 160193323Sed return "X86 DAG->DAG Instruction Selection"; 161193323Sed } 162193323Sed 163207618Srdivacky virtual void EmitFunctionEntryCode(); 164193323Sed 165203954Srdivacky virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; 166193323Sed 167204642Srdivacky virtual void PreprocessISelDAG(); 168203954Srdivacky 169212904Sdim inline bool immSext8(SDNode *N) const { 170212904Sdim return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue()); 171212904Sdim } 172212904Sdim 173212904Sdim // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit 174212904Sdim // sign extended field. 175212904Sdim inline bool i64immSExt32(SDNode *N) const { 176212904Sdim uint64_t v = cast<ConstantSDNode>(N)->getZExtValue(); 177212904Sdim return (int64_t)v == (int32_t)v; 178212904Sdim } 179212904Sdim 180193323Sed// Include the pieces autogenerated from the target description. 181193323Sed#include "X86GenDAGISel.inc" 182193323Sed 183193323Sed private: 184202375Srdivacky SDNode *Select(SDNode *N); 185245431Sdim SDNode *SelectGather(SDNode *N, unsigned Opc); 186193323Sed SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 187263509Sdim SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT); 188193323Sed 189224145Sdim bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); 190218893Sdim bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); 191193323Sed bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 192198090Srdivacky bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 193198090Srdivacky bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 194198090Srdivacky unsigned Depth); 195193323Sed bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 196218893Sdim bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, 197193323Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 198193323Sed SDValue &Segment); 199263509Sdim bool SelectMOV64Imm32(SDValue N, SDValue &Imm); 200218893Sdim bool SelectLEAAddr(SDValue N, SDValue &Base, 201210299Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 202210299Sed SDValue &Segment); 203263509Sdim bool SelectLEA64_32Addr(SDValue N, SDValue &Base, 204263509Sdim SDValue &Scale, SDValue &Index, SDValue &Disp, 205263509Sdim SDValue &Segment); 206218893Sdim bool SelectTLSADDRAddr(SDValue N, SDValue &Base, 207210299Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 208210299Sed SDValue &Segment); 209204642Srdivacky bool SelectScalarSSELoad(SDNode *Root, SDValue N, 210204642Srdivacky SDValue &Base, SDValue &Scale, 211193323Sed SDValue &Index, SDValue &Disp, 212193323Sed SDValue &Segment, 213204642Srdivacky SDValue &NodeWithChain); 214245431Sdim 215202375Srdivacky bool TryFoldLoad(SDNode *P, SDValue N, 216193323Sed SDValue &Base, SDValue &Scale, 217193323Sed SDValue &Index, SDValue &Disp, 218193323Sed SDValue &Segment); 219245431Sdim 220193323Sed /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 221193323Sed /// inline asm expressions. 222193323Sed virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 223193323Sed char ConstraintCode, 224193323Sed std::vector<SDValue> &OutOps); 225245431Sdim 226193323Sed void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 227193323Sed 228245431Sdim inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 229193323Sed SDValue &Scale, SDValue &Index, 230193323Sed SDValue &Disp, SDValue &Segment) { 231193323Sed Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 232263509Sdim CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, 233263509Sdim getTargetLowering()->getPointerTy()) : 234207618Srdivacky AM.Base_Reg; 235193323Sed Scale = getI8Imm(AM.Scale); 236193323Sed Index = AM.IndexReg; 237193323Sed // These are 32-bit even in 64-bit mode since RIP relative offset 238193323Sed // is 32-bit. 239193323Sed if (AM.GV) 240263509Sdim Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), 241210299Sed MVT::i32, AM.Disp, 242195098Sed AM.SymbolFlags); 243193323Sed else if (AM.CP) 244193323Sed Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 245195098Sed AM.Align, AM.Disp, AM.SymbolFlags); 246245431Sdim else if (AM.ES) { 247245431Sdim assert(!AM.Disp && "Non-zero displacement is ignored with ES."); 248195098Sed Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 249245431Sdim } else if (AM.JT != -1) { 250245431Sdim assert(!AM.Disp && "Non-zero displacement is ignored with JT."); 251195098Sed Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 252245431Sdim } else if (AM.BlockAddr) 253245431Sdim Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, 254245431Sdim AM.SymbolFlags); 255193323Sed else 256193323Sed Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 257193323Sed 258193323Sed if (AM.Segment.getNode()) 259193323Sed Segment = AM.Segment; 260193323Sed else 261193323Sed Segment = CurDAG->getRegister(0, MVT::i32); 262193323Sed } 263193323Sed 264193323Sed /// getI8Imm - Return a target constant with the specified value, of type 265193323Sed /// i8. 266193323Sed inline SDValue getI8Imm(unsigned Imm) { 267193323Sed return CurDAG->getTargetConstant(Imm, MVT::i8); 268193323Sed } 269193323Sed 270193323Sed /// getI32Imm - Return a target constant with the specified value, of type 271193323Sed /// i32. 272193323Sed inline SDValue getI32Imm(unsigned Imm) { 273193323Sed return CurDAG->getTargetConstant(Imm, MVT::i32); 274193323Sed } 275193323Sed 276193323Sed /// getGlobalBaseReg - Return an SDNode that returns the value of 277193323Sed /// the global base register. Output instructions required to 278193323Sed /// initialize the global base register, if necessary. 279193323Sed /// 280193323Sed SDNode *getGlobalBaseReg(); 281193323Sed 282193399Sed /// getTargetMachine - Return a reference to the TargetMachine, casted 283193399Sed /// to the target-specific type. 284252723Sdim const X86TargetMachine &getTargetMachine() const { 285193399Sed return static_cast<const X86TargetMachine &>(TM); 286193399Sed } 287193399Sed 288193399Sed /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 289193399Sed /// to the target-specific type. 290252723Sdim const X86InstrInfo *getInstrInfo() const { 291193399Sed return getTargetMachine().getInstrInfo(); 292193399Sed } 293193323Sed }; 294193323Sed} 295193323Sed 296193323Sed 297203954Srdivackybool 298203954SrdivackyX86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { 299193323Sed if (OptLevel == CodeGenOpt::None) return false; 300193323Sed 301203954Srdivacky if (!N.hasOneUse()) 302203954Srdivacky return false; 303203954Srdivacky 304203954Srdivacky if (N.getOpcode() != ISD::LOAD) 305203954Srdivacky return true; 306203954Srdivacky 307203954Srdivacky // If N is a load, do additional profitability checks. 308203954Srdivacky if (U == Root) { 309193323Sed switch (U->getOpcode()) { 310193323Sed default: break; 311202375Srdivacky case X86ISD::ADD: 312202375Srdivacky case X86ISD::SUB: 313202375Srdivacky case X86ISD::AND: 314202375Srdivacky case X86ISD::XOR: 315202375Srdivacky case X86ISD::OR: 316193323Sed case ISD::ADD: 317193323Sed case ISD::ADDC: 318193323Sed case ISD::ADDE: 319193323Sed case ISD::AND: 320193323Sed case ISD::OR: 321193323Sed case ISD::XOR: { 322193323Sed SDValue Op1 = U->getOperand(1); 323193323Sed 324193323Sed // If the other operand is a 8-bit immediate we should fold the immediate 325193323Sed // instead. This reduces code size. 326193323Sed // e.g. 327193323Sed // movl 4(%esp), %eax 328193323Sed // addl $4, %eax 329193323Sed // vs. 330193323Sed // movl $4, %eax 331193323Sed // addl 4(%esp), %eax 332193323Sed // The former is 2 bytes shorter. In case where the increment is 1, then 333193323Sed // the saving can be 4 bytes (by using incl %eax). 334193323Sed if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 335193323Sed if (Imm->getAPIntValue().isSignedIntN(8)) 336193323Sed return false; 337193323Sed 338193323Sed // If the other operand is a TLS address, we should fold it instead. 339193323Sed // This produces 340193323Sed // movl %gs:0, %eax 341193323Sed // leal i@NTPOFF(%eax), %eax 342193323Sed // instead of 343193323Sed // movl $i@NTPOFF, %eax 344193323Sed // addl %gs:0, %eax 345193323Sed // if the block also has an access to a second TLS address this will save 346193323Sed // a load. 347193323Sed // FIXME: This is probably also true for non TLS addresses. 348193323Sed if (Op1.getOpcode() == X86ISD::Wrapper) { 349193323Sed SDValue Val = Op1.getOperand(0); 350193323Sed if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 351193323Sed return false; 352193323Sed } 353193323Sed } 354193323Sed } 355203954Srdivacky } 356193323Sed 357203954Srdivacky return true; 358203954Srdivacky} 359203954Srdivacky 360205218Srdivacky/// MoveBelowCallOrigChain - Replace the original chain operand of the call with 361205218Srdivacky/// load's chain operand and move load below the call's chain operand. 362205218Srdivackystatic void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, 363245431Sdim SDValue Call, SDValue OrigChain) { 364193323Sed SmallVector<SDValue, 8> Ops; 365205218Srdivacky SDValue Chain = OrigChain.getOperand(0); 366193323Sed if (Chain.getNode() == Load.getNode()) 367193323Sed Ops.push_back(Load.getOperand(0)); 368193323Sed else { 369193323Sed assert(Chain.getOpcode() == ISD::TokenFactor && 370205218Srdivacky "Unexpected chain operand"); 371193323Sed for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 372193323Sed if (Chain.getOperand(i).getNode() == Load.getNode()) 373193323Sed Ops.push_back(Load.getOperand(0)); 374193323Sed else 375193323Sed Ops.push_back(Chain.getOperand(i)); 376193323Sed SDValue NewChain = 377263509Sdim CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), 378193323Sed MVT::Other, &Ops[0], Ops.size()); 379193323Sed Ops.clear(); 380193323Sed Ops.push_back(NewChain); 381193323Sed } 382205218Srdivacky for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) 383205218Srdivacky Ops.push_back(OrigChain.getOperand(i)); 384210299Sed CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); 385210299Sed CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), 386193323Sed Load.getOperand(1), Load.getOperand(2)); 387245431Sdim 388245431Sdim unsigned NumOps = Call.getNode()->getNumOperands(); 389193323Sed Ops.clear(); 390193323Sed Ops.push_back(SDValue(Load.getNode(), 1)); 391245431Sdim for (unsigned i = 1, e = NumOps; i != e; ++i) 392193323Sed Ops.push_back(Call.getOperand(i)); 393245431Sdim CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps); 394193323Sed} 395193323Sed 396193323Sed/// isCalleeLoad - Return true if call address is a load and it can be 397193323Sed/// moved below CALLSEQ_START and the chains leading up to the call. 398193323Sed/// Return the CALLSEQ_START by reference as a second output. 399205218Srdivacky/// In the case of a tail call, there isn't a callseq node between the call 400205218Srdivacky/// chain and the load. 401205218Srdivackystatic bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { 402245431Sdim // The transformation is somewhat dangerous if the call's chain was glued to 403245431Sdim // the call. After MoveBelowOrigChain the load is moved between the call and 404245431Sdim // the chain, this can create a cycle if the load is not folded. So it is 405245431Sdim // *really* important that we are sure the load will be folded. 406193323Sed if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 407193323Sed return false; 408193323Sed LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 409193323Sed if (!LD || 410193323Sed LD->isVolatile() || 411193323Sed LD->getAddressingMode() != ISD::UNINDEXED || 412193323Sed LD->getExtensionType() != ISD::NON_EXTLOAD) 413193323Sed return false; 414193323Sed 415193323Sed // Now let's find the callseq_start. 416205218Srdivacky while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { 417193323Sed if (!Chain.hasOneUse()) 418193323Sed return false; 419193323Sed Chain = Chain.getOperand(0); 420193323Sed } 421205218Srdivacky 422205218Srdivacky if (!Chain.getNumOperands()) 423205218Srdivacky return false; 424252723Sdim // Since we are not checking for AA here, conservatively abort if the chain 425252723Sdim // writes to memory. It's not safe to move the callee (a load) across a store. 426252723Sdim if (isa<MemSDNode>(Chain.getNode()) && 427252723Sdim cast<MemSDNode>(Chain.getNode())->writeMem()) 428252723Sdim return false; 429193323Sed if (Chain.getOperand(0).getNode() == Callee.getNode()) 430193323Sed return true; 431193323Sed if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 432198090Srdivacky Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 433198090Srdivacky Callee.getValue(1).hasOneUse()) 434193323Sed return true; 435193323Sed return false; 436193323Sed} 437193323Sed 438204642Srdivackyvoid X86DAGToDAGISel::PreprocessISelDAG() { 439204792Srdivacky // OptForSize is used in pattern predicates that isel is matching. 440252723Sdim OptForSize = MF->getFunction()->getAttributes(). 441252723Sdim hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); 442245431Sdim 443204642Srdivacky for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 444204642Srdivacky E = CurDAG->allnodes_end(); I != E; ) { 445204642Srdivacky SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 446193323Sed 447205218Srdivacky if (OptLevel != CodeGenOpt::None && 448252723Sdim // Only does this when target favors doesn't favor register indirect 449252723Sdim // call. 450252723Sdim ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || 451245431Sdim (N->getOpcode() == X86ISD::TC_RETURN && 452252723Sdim // Only does this if load can be folded into TC_RETURN. 453245431Sdim (Subtarget->is64Bit() || 454245431Sdim getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { 455193323Sed /// Also try moving call address load from outside callseq_start to just 456193323Sed /// before the call to allow it to be folded. 457193323Sed /// 458193323Sed /// [Load chain] 459193323Sed /// ^ 460193323Sed /// | 461193323Sed /// [Load] 462193323Sed /// ^ ^ 463193323Sed /// | | 464193323Sed /// / \-- 465193323Sed /// / | 466193323Sed ///[CALLSEQ_START] | 467193323Sed /// ^ | 468193323Sed /// | | 469193323Sed /// [LOAD/C2Reg] | 470193323Sed /// | | 471193323Sed /// \ / 472193323Sed /// \ / 473193323Sed /// [CALL] 474205218Srdivacky bool HasCallSeq = N->getOpcode() == X86ISD::CALL; 475204642Srdivacky SDValue Chain = N->getOperand(0); 476204642Srdivacky SDValue Load = N->getOperand(1); 477205218Srdivacky if (!isCalleeLoad(Load, Chain, HasCallSeq)) 478193323Sed continue; 479205218Srdivacky MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); 480193323Sed ++NumLoadMoved; 481193323Sed continue; 482193323Sed } 483245431Sdim 484204642Srdivacky // Lower fpround and fpextend nodes that target the FP stack to be store and 485204642Srdivacky // load to the stack. This is a gross hack. We would like to simply mark 486204642Srdivacky // these as being illegal, but when we do that, legalize produces these when 487204642Srdivacky // it expands calls, then expands these in the same legalize pass. We would 488204642Srdivacky // like dag combine to be able to hack on these between the call expansion 489204642Srdivacky // and the node legalization. As such this pass basically does "really 490204642Srdivacky // late" legalization of these inline with the X86 isel pass. 491204642Srdivacky // FIXME: This should only happen when not compiled with -O0. 492193323Sed if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 493193323Sed continue; 494245431Sdim 495263509Sdim MVT SrcVT = N->getOperand(0).getSimpleValueType(); 496263509Sdim MVT DstVT = N->getSimpleValueType(0); 497226890Sdim 498226890Sdim // If any of the sources are vectors, no fp stack involved. 499226890Sdim if (SrcVT.isVector() || DstVT.isVector()) 500226890Sdim continue; 501226890Sdim 502193323Sed // If the source and destination are SSE registers, then this is a legal 503193323Sed // conversion that should not be lowered. 504263509Sdim const X86TargetLowering *X86Lowering = 505263509Sdim static_cast<const X86TargetLowering *>(getTargetLowering()); 506263509Sdim bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); 507263509Sdim bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); 508193323Sed if (SrcIsSSE && DstIsSSE) 509193323Sed continue; 510193323Sed 511193323Sed if (!SrcIsSSE && !DstIsSSE) { 512193323Sed // If this is an FPStack extension, it is a noop. 513193323Sed if (N->getOpcode() == ISD::FP_EXTEND) 514193323Sed continue; 515193323Sed // If this is a value-preserving FPStack truncation, it is a noop. 516193323Sed if (N->getConstantOperandVal(1)) 517193323Sed continue; 518193323Sed } 519245431Sdim 520193323Sed // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 521193323Sed // FPStack has extload and truncstore. SSE can fold direct loads into other 522193323Sed // operations. Based on this, decide what we want to do. 523263509Sdim MVT MemVT; 524193323Sed if (N->getOpcode() == ISD::FP_ROUND) 525193323Sed MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 526193323Sed else 527193323Sed MemVT = SrcIsSSE ? SrcVT : DstVT; 528245431Sdim 529193323Sed SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 530263509Sdim SDLoc dl(N); 531245431Sdim 532193323Sed // FIXME: optimize the case where the src/dest is a load or store? 533193323Sed SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 534193323Sed N->getOperand(0), 535218893Sdim MemTmp, MachinePointerInfo(), MemVT, 536203954Srdivacky false, false, 0); 537218893Sdim SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 538218893Sdim MachinePointerInfo(), 539218893Sdim MemVT, false, false, 0); 540193323Sed 541193323Sed // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 542193323Sed // extload we created. This will cause general havok on the dag because 543193323Sed // anything below the conversion could be folded into other existing nodes. 544193323Sed // To avoid invalidating 'I', back it up to the convert node. 545193323Sed --I; 546193323Sed CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 547245431Sdim 548193323Sed // Now that we did that, the node is dead. Increment the iterator to the 549193323Sed // next node to process, then delete N. 550193323Sed ++I; 551193323Sed CurDAG->DeleteNode(N); 552245431Sdim } 553193323Sed} 554193323Sed 555193323Sed 556193323Sed/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 557193323Sed/// the main function. 558193323Sedvoid X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 559193323Sed MachineFrameInfo *MFI) { 560193323Sed const TargetInstrInfo *TII = TM.getInstrInfo(); 561218893Sdim if (Subtarget->isTargetCygMing()) { 562218893Sdim unsigned CallOp = 563235633Sdim Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; 564206124Srdivacky BuildMI(BB, DebugLoc(), 565218893Sdim TII->get(CallOp)).addExternalSymbol("__main"); 566218893Sdim } 567193323Sed} 568193323Sed 569207618Srdivackyvoid X86DAGToDAGISel::EmitFunctionEntryCode() { 570193323Sed // If this is main, emit special code for main. 571207618Srdivacky if (const Function *Fn = MF->getFunction()) 572207618Srdivacky if (Fn->hasExternalLinkage() && Fn->getName() == "main") 573207618Srdivacky EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); 574193323Sed} 575193323Sed 576224145Sdimstatic bool isDispSafeForFrameIndex(int64_t Val) { 577224145Sdim // On 64-bit platforms, we can run into an issue where a frame index 578224145Sdim // includes a displacement that, when added to the explicit displacement, 579224145Sdim // will overflow the displacement field. Assuming that the frame index 580224145Sdim // displacement fits into a 31-bit integer (which is only slightly more 581224145Sdim // aggressive than the current fundamental assumption that it fits into 582224145Sdim // a 32-bit integer), a 31-bit disp should always be safe. 583224145Sdim return isInt<31>(Val); 584224145Sdim} 585193323Sed 586224145Sdimbool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, 587224145Sdim X86ISelAddressMode &AM) { 588224145Sdim int64_t Val = AM.Disp + Offset; 589224145Sdim CodeModel::Model M = TM.getCodeModel(); 590224145Sdim if (Subtarget->is64Bit()) { 591224145Sdim if (!X86::isOffsetSuitableForCodeModel(Val, M, 592224145Sdim AM.hasSymbolicDisplacement())) 593224145Sdim return true; 594224145Sdim // In addition to the checks required for a register base, check that 595224145Sdim // we do not try to use an unsafe Disp with a frame index. 596224145Sdim if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && 597224145Sdim !isDispSafeForFrameIndex(Val)) 598224145Sdim return true; 599224145Sdim } 600224145Sdim AM.Disp = Val; 601224145Sdim return false; 602224145Sdim 603224145Sdim} 604224145Sdim 605218893Sdimbool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ 606218893Sdim SDValue Address = N->getOperand(1); 607245431Sdim 608218893Sdim // load gs:0 -> GS segment register. 609218893Sdim // load fs:0 -> FS segment register. 610218893Sdim // 611193323Sed // This optimization is valid because the GNU TLS model defines that 612193323Sed // gs:0 (or fs:0 on X86-64) contains its own address. 613193323Sed // For more information see http://people.redhat.com/drepper/tls.pdf 614218893Sdim if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) 615218893Sdim if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && 616245431Sdim Subtarget->isTargetLinux()) 617218893Sdim switch (N->getPointerInfo().getAddrSpace()) { 618218893Sdim case 256: 619218893Sdim AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 620218893Sdim return false; 621218893Sdim case 257: 622218893Sdim AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 623218893Sdim return false; 624218893Sdim } 625245431Sdim 626193323Sed return true; 627193323Sed} 628193323Sed 629195098Sed/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 630195098Sed/// into an addressing mode. These wrap things that will resolve down into a 631195098Sed/// symbol reference. If no match is possible, this returns true, otherwise it 632198090Srdivacky/// returns false. 633193323Sedbool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 634195098Sed // If the addressing mode already has a symbol as the displacement, we can 635195098Sed // never match another symbol. 636193323Sed if (AM.hasSymbolicDisplacement()) 637193323Sed return true; 638193323Sed 639193323Sed SDValue N0 = N.getOperand(0); 640198090Srdivacky CodeModel::Model M = TM.getCodeModel(); 641198090Srdivacky 642195098Sed // Handle X86-64 rip-relative addresses. We check this before checking direct 643195098Sed // folding because RIP is preferable to non-RIP accesses. 644235633Sdim if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP && 645195098Sed // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 646195098Sed // they cannot be folded into immediate fields. 647195098Sed // FIXME: This can be improved for kernel and other models? 648235633Sdim (M == CodeModel::Small || M == CodeModel::Kernel)) { 649235633Sdim // Base and index reg must be 0 in order to use %rip as base. 650235633Sdim if (AM.hasBaseOrIndexReg()) 651235633Sdim return true; 652195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 653224145Sdim X86ISelAddressMode Backup = AM; 654195098Sed AM.GV = G->getGlobal(); 655195098Sed AM.SymbolFlags = G->getTargetFlags(); 656224145Sdim if (FoldOffsetIntoAddress(G->getOffset(), AM)) { 657224145Sdim AM = Backup; 658224145Sdim return true; 659224145Sdim } 660195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 661224145Sdim X86ISelAddressMode Backup = AM; 662195098Sed AM.CP = CP->getConstVal(); 663195098Sed AM.Align = CP->getAlignment(); 664195098Sed AM.SymbolFlags = CP->getTargetFlags(); 665224145Sdim if (FoldOffsetIntoAddress(CP->getOffset(), AM)) { 666224145Sdim AM = Backup; 667224145Sdim return true; 668224145Sdim } 669195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 670195098Sed AM.ES = S->getSymbol(); 671195098Sed AM.SymbolFlags = S->getTargetFlags(); 672198892Srdivacky } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 673195098Sed AM.JT = J->getIndex(); 674195098Sed AM.SymbolFlags = J->getTargetFlags(); 675245431Sdim } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { 676245431Sdim X86ISelAddressMode Backup = AM; 677245431Sdim AM.BlockAddr = BA->getBlockAddress(); 678245431Sdim AM.SymbolFlags = BA->getTargetFlags(); 679245431Sdim if (FoldOffsetIntoAddress(BA->getOffset(), AM)) { 680245431Sdim AM = Backup; 681245431Sdim return true; 682245431Sdim } 683245431Sdim } else 684245431Sdim llvm_unreachable("Unhandled symbol reference node."); 685198090Srdivacky 686195098Sed if (N.getOpcode() == X86ISD::WrapperRIP) 687195098Sed AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 688195098Sed return false; 689195098Sed } 690195098Sed 691195098Sed // Handle the case when globals fit in our immediate field: This is true for 692235633Sdim // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit 693235633Sdim // mode, this only applies to a non-RIP-relative computation. 694195098Sed if (!Subtarget->is64Bit() || 695235633Sdim M == CodeModel::Small || M == CodeModel::Kernel) { 696235633Sdim assert(N.getOpcode() != X86ISD::WrapperRIP && 697235633Sdim "RIP-relative addressing already handled"); 698195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 699195098Sed AM.GV = G->getGlobal(); 700195098Sed AM.Disp += G->getOffset(); 701195098Sed AM.SymbolFlags = G->getTargetFlags(); 702195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 703193323Sed AM.CP = CP->getConstVal(); 704193323Sed AM.Align = CP->getAlignment(); 705195098Sed AM.Disp += CP->getOffset(); 706195098Sed AM.SymbolFlags = CP->getTargetFlags(); 707195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 708195098Sed AM.ES = S->getSymbol(); 709195098Sed AM.SymbolFlags = S->getTargetFlags(); 710198892Srdivacky } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 711195098Sed AM.JT = J->getIndex(); 712195098Sed AM.SymbolFlags = J->getTargetFlags(); 713245431Sdim } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { 714245431Sdim AM.BlockAddr = BA->getBlockAddress(); 715245431Sdim AM.Disp += BA->getOffset(); 716245431Sdim AM.SymbolFlags = BA->getTargetFlags(); 717245431Sdim } else 718245431Sdim llvm_unreachable("Unhandled symbol reference node."); 719193323Sed return false; 720193323Sed } 721193323Sed 722193323Sed return true; 723193323Sed} 724193323Sed 725193323Sed/// MatchAddress - Add the specified node to the specified addressing mode, 726193323Sed/// returning true if it cannot be done. This just pattern matches for the 727193323Sed/// addressing mode. 728198090Srdivackybool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 729210299Sed if (MatchAddressRecursively(N, AM, 0)) 730198090Srdivacky return true; 731198090Srdivacky 732198090Srdivacky // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 733198090Srdivacky // a smaller encoding and avoids a scaled-index. 734198090Srdivacky if (AM.Scale == 2 && 735198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 736207618Srdivacky AM.Base_Reg.getNode() == 0) { 737207618Srdivacky AM.Base_Reg = AM.IndexReg; 738198090Srdivacky AM.Scale = 1; 739198090Srdivacky } 740198090Srdivacky 741198090Srdivacky // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 742198090Srdivacky // because it has a smaller encoding. 743198090Srdivacky // TODO: Which other code models can use this? 744198090Srdivacky if (TM.getCodeModel() == CodeModel::Small && 745198090Srdivacky Subtarget->is64Bit() && 746198090Srdivacky AM.Scale == 1 && 747198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 748207618Srdivacky AM.Base_Reg.getNode() == 0 && 749198090Srdivacky AM.IndexReg.getNode() == 0 && 750198090Srdivacky AM.SymbolFlags == X86II::MO_NO_FLAG && 751198090Srdivacky AM.hasSymbolicDisplacement()) 752207618Srdivacky AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 753198090Srdivacky 754198090Srdivacky return false; 755198090Srdivacky} 756198090Srdivacky 757235633Sdim// Insert a node into the DAG at least before the Pos node's position. This 758235633Sdim// will reposition the node as needed, and will assign it a node ID that is <= 759235633Sdim// the Pos node's ID. Note that this does *not* preserve the uniqueness of node 760235633Sdim// IDs! The selection DAG must no longer depend on their uniqueness when this 761235633Sdim// is used. 762235633Sdimstatic void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { 763235633Sdim if (N.getNode()->getNodeId() == -1 || 764235633Sdim N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) { 765235633Sdim DAG.RepositionNode(Pos.getNode(), N.getNode()); 766235633Sdim N.getNode()->setNodeId(Pos.getNode()->getNodeId()); 767235633Sdim } 768235633Sdim} 769235633Sdim 770235633Sdim// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This 771235633Sdim// allows us to convert the shift and and into an h-register extract and 772235633Sdim// a scaled index. Returns false if the simplification is performed. 773235633Sdimstatic bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, 774235633Sdim uint64_t Mask, 775235633Sdim SDValue Shift, SDValue X, 776235633Sdim X86ISelAddressMode &AM) { 777235633Sdim if (Shift.getOpcode() != ISD::SRL || 778235633Sdim !isa<ConstantSDNode>(Shift.getOperand(1)) || 779235633Sdim !Shift.hasOneUse()) 780235633Sdim return true; 781235633Sdim 782235633Sdim int ScaleLog = 8 - Shift.getConstantOperandVal(1); 783235633Sdim if (ScaleLog <= 0 || ScaleLog >= 4 || 784235633Sdim Mask != (0xffu << ScaleLog)) 785235633Sdim return true; 786235633Sdim 787263509Sdim MVT VT = N.getSimpleValueType(); 788263509Sdim SDLoc DL(N); 789235633Sdim SDValue Eight = DAG.getConstant(8, MVT::i8); 790235633Sdim SDValue NewMask = DAG.getConstant(0xff, VT); 791235633Sdim SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); 792235633Sdim SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); 793235633Sdim SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8); 794235633Sdim SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); 795235633Sdim 796235633Sdim // Insert the new nodes into the topological ordering. We must do this in 797235633Sdim // a valid topological ordering as nothing is going to go back and re-sort 798235633Sdim // these nodes. We continually insert before 'N' in sequence as this is 799235633Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 800235633Sdim // hierarchy left to express. 801235633Sdim InsertDAGNode(DAG, N, Eight); 802235633Sdim InsertDAGNode(DAG, N, Srl); 803235633Sdim InsertDAGNode(DAG, N, NewMask); 804235633Sdim InsertDAGNode(DAG, N, And); 805235633Sdim InsertDAGNode(DAG, N, ShlCount); 806235633Sdim InsertDAGNode(DAG, N, Shl); 807235633Sdim DAG.ReplaceAllUsesWith(N, Shl); 808235633Sdim AM.IndexReg = And; 809235633Sdim AM.Scale = (1 << ScaleLog); 810235633Sdim return false; 811235633Sdim} 812235633Sdim 813235633Sdim// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this 814235633Sdim// allows us to fold the shift into this addressing mode. Returns false if the 815235633Sdim// transform succeeded. 816235633Sdimstatic bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, 817235633Sdim uint64_t Mask, 818235633Sdim SDValue Shift, SDValue X, 819235633Sdim X86ISelAddressMode &AM) { 820235633Sdim if (Shift.getOpcode() != ISD::SHL || 821235633Sdim !isa<ConstantSDNode>(Shift.getOperand(1))) 822235633Sdim return true; 823235633Sdim 824235633Sdim // Not likely to be profitable if either the AND or SHIFT node has more 825235633Sdim // than one use (unless all uses are for address computation). Besides, 826235633Sdim // isel mechanism requires their node ids to be reused. 827235633Sdim if (!N.hasOneUse() || !Shift.hasOneUse()) 828235633Sdim return true; 829235633Sdim 830235633Sdim // Verify that the shift amount is something we can fold. 831235633Sdim unsigned ShiftAmt = Shift.getConstantOperandVal(1); 832235633Sdim if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) 833235633Sdim return true; 834235633Sdim 835263509Sdim MVT VT = N.getSimpleValueType(); 836263509Sdim SDLoc DL(N); 837235633Sdim SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT); 838235633Sdim SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); 839235633Sdim SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); 840235633Sdim 841235633Sdim // Insert the new nodes into the topological ordering. We must do this in 842235633Sdim // a valid topological ordering as nothing is going to go back and re-sort 843235633Sdim // these nodes. We continually insert before 'N' in sequence as this is 844235633Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 845235633Sdim // hierarchy left to express. 846235633Sdim InsertDAGNode(DAG, N, NewMask); 847235633Sdim InsertDAGNode(DAG, N, NewAnd); 848235633Sdim InsertDAGNode(DAG, N, NewShift); 849235633Sdim DAG.ReplaceAllUsesWith(N, NewShift); 850235633Sdim 851235633Sdim AM.Scale = 1 << ShiftAmt; 852235633Sdim AM.IndexReg = NewAnd; 853235633Sdim return false; 854235633Sdim} 855235633Sdim 856235633Sdim// Implement some heroics to detect shifts of masked values where the mask can 857235633Sdim// be replaced by extending the shift and undoing that in the addressing mode 858235633Sdim// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and 859235633Sdim// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in 860235633Sdim// the addressing mode. This results in code such as: 861235633Sdim// 862235633Sdim// int f(short *y, int *lookup_table) { 863235633Sdim// ... 864235633Sdim// return *y + lookup_table[*y >> 11]; 865235633Sdim// } 866235633Sdim// 867235633Sdim// Turning into: 868235633Sdim// movzwl (%rdi), %eax 869235633Sdim// movl %eax, %ecx 870235633Sdim// shrl $11, %ecx 871235633Sdim// addl (%rsi,%rcx,4), %eax 872235633Sdim// 873235633Sdim// Instead of: 874235633Sdim// movzwl (%rdi), %eax 875235633Sdim// movl %eax, %ecx 876235633Sdim// shrl $9, %ecx 877235633Sdim// andl $124, %rcx 878235633Sdim// addl (%rsi,%rcx), %eax 879235633Sdim// 880235633Sdim// Note that this function assumes the mask is provided as a mask *after* the 881235633Sdim// value is shifted. The input chain may or may not match that, but computing 882235633Sdim// such a mask is trivial. 883235633Sdimstatic bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, 884235633Sdim uint64_t Mask, 885235633Sdim SDValue Shift, SDValue X, 886235633Sdim X86ISelAddressMode &AM) { 887235633Sdim if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || 888235633Sdim !isa<ConstantSDNode>(Shift.getOperand(1))) 889235633Sdim return true; 890235633Sdim 891235633Sdim unsigned ShiftAmt = Shift.getConstantOperandVal(1); 892263509Sdim unsigned MaskLZ = countLeadingZeros(Mask); 893263509Sdim unsigned MaskTZ = countTrailingZeros(Mask); 894235633Sdim 895235633Sdim // The amount of shift we're trying to fit into the addressing mode is taken 896235633Sdim // from the trailing zeros of the mask. 897235633Sdim unsigned AMShiftAmt = MaskTZ; 898235633Sdim 899235633Sdim // There is nothing we can do here unless the mask is removing some bits. 900235633Sdim // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. 901235633Sdim if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true; 902235633Sdim 903235633Sdim // We also need to ensure that mask is a continuous run of bits. 904235633Sdim if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; 905235633Sdim 906235633Sdim // Scale the leading zero count down based on the actual size of the value. 907235633Sdim // Also scale it down based on the size of the shift. 908263509Sdim MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; 909235633Sdim 910235633Sdim // The final check is to ensure that any masked out high bits of X are 911235633Sdim // already known to be zero. Otherwise, the mask has a semantic impact 912235633Sdim // other than masking out a couple of low bits. Unfortunately, because of 913235633Sdim // the mask, zero extensions will be removed from operands in some cases. 914235633Sdim // This code works extra hard to look through extensions because we can 915235633Sdim // replace them with zero extensions cheaply if necessary. 916235633Sdim bool ReplacingAnyExtend = false; 917235633Sdim if (X.getOpcode() == ISD::ANY_EXTEND) { 918263509Sdim unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - 919263509Sdim X.getOperand(0).getSimpleValueType().getSizeInBits(); 920235633Sdim // Assume that we'll replace the any-extend with a zero-extend, and 921235633Sdim // narrow the search to the extended value. 922235633Sdim X = X.getOperand(0); 923235633Sdim MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; 924235633Sdim ReplacingAnyExtend = true; 925235633Sdim } 926263509Sdim APInt MaskedHighBits = 927263509Sdim APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); 928235633Sdim APInt KnownZero, KnownOne; 929235633Sdim DAG.ComputeMaskedBits(X, KnownZero, KnownOne); 930235633Sdim if (MaskedHighBits != KnownZero) return true; 931235633Sdim 932235633Sdim // We've identified a pattern that can be transformed into a single shift 933235633Sdim // and an addressing mode. Make it so. 934263509Sdim MVT VT = N.getSimpleValueType(); 935235633Sdim if (ReplacingAnyExtend) { 936235633Sdim assert(X.getValueType() != VT); 937235633Sdim // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. 938263509Sdim SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); 939235633Sdim InsertDAGNode(DAG, N, NewX); 940235633Sdim X = NewX; 941235633Sdim } 942263509Sdim SDLoc DL(N); 943235633Sdim SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); 944235633Sdim SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); 945235633Sdim SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8); 946235633Sdim SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); 947235633Sdim 948235633Sdim // Insert the new nodes into the topological ordering. We must do this in 949235633Sdim // a valid topological ordering as nothing is going to go back and re-sort 950235633Sdim // these nodes. We continually insert before 'N' in sequence as this is 951235633Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 952235633Sdim // hierarchy left to express. 953235633Sdim InsertDAGNode(DAG, N, NewSRLAmt); 954235633Sdim InsertDAGNode(DAG, N, NewSRL); 955235633Sdim InsertDAGNode(DAG, N, NewSHLAmt); 956235633Sdim InsertDAGNode(DAG, N, NewSHL); 957235633Sdim DAG.ReplaceAllUsesWith(N, NewSHL); 958235633Sdim 959235633Sdim AM.Scale = 1 << AMShiftAmt; 960235633Sdim AM.IndexReg = NewSRL; 961235633Sdim return false; 962235633Sdim} 963235633Sdim 964198090Srdivackybool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 965198090Srdivacky unsigned Depth) { 966263509Sdim SDLoc dl(N); 967198090Srdivacky DEBUG({ 968202375Srdivacky dbgs() << "MatchAddress: "; 969198090Srdivacky AM.dump(); 970198090Srdivacky }); 971193323Sed // Limit recursion. 972193323Sed if (Depth > 5) 973193323Sed return MatchAddressBase(N, AM); 974198090Srdivacky 975195098Sed // If this is already a %rip relative address, we can only merge immediates 976195098Sed // into it. Instead of handling this in every case, we handle it here. 977193323Sed // RIP relative addressing: %rip + 32-bit displacement! 978195098Sed if (AM.isRIPRelative()) { 979195098Sed // FIXME: JumpTable and ExternalSymbol address currently don't like 980195098Sed // displacements. It isn't very important, but this should be fixed for 981195098Sed // consistency. 982195098Sed if (!AM.ES && AM.JT != -1) return true; 983198090Srdivacky 984224145Sdim if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) 985224145Sdim if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) 986193323Sed return false; 987193323Sed return true; 988193323Sed } 989193323Sed 990193323Sed switch (N.getOpcode()) { 991193323Sed default: break; 992193323Sed case ISD::Constant: { 993193323Sed uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 994224145Sdim if (!FoldOffsetIntoAddress(Val, AM)) 995193323Sed return false; 996193323Sed break; 997193323Sed } 998193323Sed 999193323Sed case X86ISD::Wrapper: 1000195098Sed case X86ISD::WrapperRIP: 1001193323Sed if (!MatchWrapper(N, AM)) 1002193323Sed return false; 1003193323Sed break; 1004193323Sed 1005193323Sed case ISD::LOAD: 1006218893Sdim if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM)) 1007193323Sed return false; 1008193323Sed break; 1009193323Sed 1010193323Sed case ISD::FrameIndex: 1011224145Sdim if (AM.BaseType == X86ISelAddressMode::RegBase && 1012224145Sdim AM.Base_Reg.getNode() == 0 && 1013224145Sdim (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { 1014193323Sed AM.BaseType = X86ISelAddressMode::FrameIndexBase; 1015207618Srdivacky AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 1016193323Sed return false; 1017193323Sed } 1018193323Sed break; 1019193323Sed 1020193323Sed case ISD::SHL: 1021195098Sed if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) 1022193323Sed break; 1023245431Sdim 1024193323Sed if (ConstantSDNode 1025193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 1026193323Sed unsigned Val = CN->getZExtValue(); 1027198090Srdivacky // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 1028198090Srdivacky // that the base operand remains free for further matching. If 1029198090Srdivacky // the base doesn't end up getting used, a post-processing step 1030198090Srdivacky // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 1031193323Sed if (Val == 1 || Val == 2 || Val == 3) { 1032193323Sed AM.Scale = 1 << Val; 1033193323Sed SDValue ShVal = N.getNode()->getOperand(0); 1034193323Sed 1035193323Sed // Okay, we know that we have a scale by now. However, if the scaled 1036193323Sed // value is an add of something and a constant, we can fold the 1037193323Sed // constant into the disp field here. 1038218893Sdim if (CurDAG->isBaseWithConstantOffset(ShVal)) { 1039193323Sed AM.IndexReg = ShVal.getNode()->getOperand(0); 1040193323Sed ConstantSDNode *AddVal = 1041193323Sed cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 1042245431Sdim uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; 1043224145Sdim if (!FoldOffsetIntoAddress(Disp, AM)) 1044224145Sdim return false; 1045193323Sed } 1046224145Sdim 1047224145Sdim AM.IndexReg = ShVal; 1048193323Sed return false; 1049193323Sed } 1050252723Sdim } 1051193323Sed break; 1052193323Sed 1053235633Sdim case ISD::SRL: { 1054235633Sdim // Scale must not be used already. 1055235633Sdim if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1056235633Sdim 1057235633Sdim SDValue And = N.getOperand(0); 1058235633Sdim if (And.getOpcode() != ISD::AND) break; 1059235633Sdim SDValue X = And.getOperand(0); 1060235633Sdim 1061235633Sdim // We only handle up to 64-bit values here as those are what matter for 1062235633Sdim // addressing mode optimizations. 1063263509Sdim if (X.getSimpleValueType().getSizeInBits() > 64) break; 1064235633Sdim 1065235633Sdim // The mask used for the transform is expected to be post-shift, but we 1066235633Sdim // found the shift first so just apply the shift to the mask before passing 1067235633Sdim // it down. 1068235633Sdim if (!isa<ConstantSDNode>(N.getOperand(1)) || 1069235633Sdim !isa<ConstantSDNode>(And.getOperand(1))) 1070235633Sdim break; 1071235633Sdim uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); 1072235633Sdim 1073235633Sdim // Try to fold the mask and shift into the scale, and return false if we 1074235633Sdim // succeed. 1075235633Sdim if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) 1076235633Sdim return false; 1077235633Sdim break; 1078235633Sdim } 1079235633Sdim 1080193323Sed case ISD::SMUL_LOHI: 1081193323Sed case ISD::UMUL_LOHI: 1082193323Sed // A mul_lohi where we need the low part can be folded as a plain multiply. 1083193323Sed if (N.getResNo() != 0) break; 1084193323Sed // FALL THROUGH 1085193323Sed case ISD::MUL: 1086193323Sed case X86ISD::MUL_IMM: 1087193323Sed // X*[3,5,9] -> X+X*[2,4,8] 1088193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 1089207618Srdivacky AM.Base_Reg.getNode() == 0 && 1090195098Sed AM.IndexReg.getNode() == 0) { 1091193323Sed if (ConstantSDNode 1092193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 1093193323Sed if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 1094193323Sed CN->getZExtValue() == 9) { 1095193323Sed AM.Scale = unsigned(CN->getZExtValue())-1; 1096193323Sed 1097193323Sed SDValue MulVal = N.getNode()->getOperand(0); 1098193323Sed SDValue Reg; 1099193323Sed 1100193323Sed // Okay, we know that we have a scale by now. However, if the scaled 1101193323Sed // value is an add of something and a constant, we can fold the 1102193323Sed // constant into the disp field here. 1103193323Sed if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 1104193323Sed isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 1105193323Sed Reg = MulVal.getNode()->getOperand(0); 1106193323Sed ConstantSDNode *AddVal = 1107193323Sed cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 1108224145Sdim uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); 1109224145Sdim if (FoldOffsetIntoAddress(Disp, AM)) 1110193323Sed Reg = N.getNode()->getOperand(0); 1111193323Sed } else { 1112193323Sed Reg = N.getNode()->getOperand(0); 1113193323Sed } 1114193323Sed 1115207618Srdivacky AM.IndexReg = AM.Base_Reg = Reg; 1116193323Sed return false; 1117193323Sed } 1118193323Sed } 1119193323Sed break; 1120193323Sed 1121193323Sed case ISD::SUB: { 1122193323Sed // Given A-B, if A can be completely folded into the address and 1123193323Sed // the index field with the index field unused, use -B as the index. 1124193323Sed // This is a win if a has multiple parts that can be folded into 1125193323Sed // the address. Also, this saves a mov if the base register has 1126193323Sed // other uses, since it avoids a two-address sub instruction, however 1127193323Sed // it costs an additional mov if the index register has other uses. 1128193323Sed 1129210299Sed // Add an artificial use to this node so that we can keep track of 1130210299Sed // it if it gets CSE'd with a different node. 1131210299Sed HandleSDNode Handle(N); 1132210299Sed 1133193323Sed // Test if the LHS of the sub can be folded. 1134193323Sed X86ISelAddressMode Backup = AM; 1135210299Sed if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 1136193323Sed AM = Backup; 1137193323Sed break; 1138193323Sed } 1139193323Sed // Test if the index field is free for use. 1140195098Sed if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 1141193323Sed AM = Backup; 1142193323Sed break; 1143193323Sed } 1144205407Srdivacky 1145193323Sed int Cost = 0; 1146210299Sed SDValue RHS = Handle.getValue().getNode()->getOperand(1); 1147193323Sed // If the RHS involves a register with multiple uses, this 1148193323Sed // transformation incurs an extra mov, due to the neg instruction 1149193323Sed // clobbering its operand. 1150193323Sed if (!RHS.getNode()->hasOneUse() || 1151193323Sed RHS.getNode()->getOpcode() == ISD::CopyFromReg || 1152193323Sed RHS.getNode()->getOpcode() == ISD::TRUNCATE || 1153193323Sed RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 1154193323Sed (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 1155193323Sed RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 1156193323Sed ++Cost; 1157193323Sed // If the base is a register with multiple uses, this 1158193323Sed // transformation may save a mov. 1159193323Sed if ((AM.BaseType == X86ISelAddressMode::RegBase && 1160207618Srdivacky AM.Base_Reg.getNode() && 1161207618Srdivacky !AM.Base_Reg.getNode()->hasOneUse()) || 1162193323Sed AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1163193323Sed --Cost; 1164193323Sed // If the folded LHS was interesting, this transformation saves 1165193323Sed // address arithmetic. 1166193323Sed if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 1167193323Sed ((AM.Disp != 0) && (Backup.Disp == 0)) + 1168193323Sed (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 1169193323Sed --Cost; 1170193323Sed // If it doesn't look like it may be an overall win, don't do it. 1171193323Sed if (Cost >= 0) { 1172193323Sed AM = Backup; 1173193323Sed break; 1174193323Sed } 1175193323Sed 1176193323Sed // Ok, the transformation is legal and appears profitable. Go for it. 1177193323Sed SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 1178193323Sed SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 1179193323Sed AM.IndexReg = Neg; 1180193323Sed AM.Scale = 1; 1181193323Sed 1182193323Sed // Insert the new nodes into the topological ordering. 1183235633Sdim InsertDAGNode(*CurDAG, N, Zero); 1184235633Sdim InsertDAGNode(*CurDAG, N, Neg); 1185193323Sed return false; 1186193323Sed } 1187193323Sed 1188193323Sed case ISD::ADD: { 1189210299Sed // Add an artificial use to this node so that we can keep track of 1190210299Sed // it if it gets CSE'd with a different node. 1191210299Sed HandleSDNode Handle(N); 1192210299Sed 1193193323Sed X86ISelAddressMode Backup = AM; 1194218893Sdim if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1195218893Sdim !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) 1196210299Sed return false; 1197210299Sed AM = Backup; 1198245431Sdim 1199205407Srdivacky // Try again after commuting the operands. 1200218893Sdim if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& 1201218893Sdim !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) 1202210299Sed return false; 1203193323Sed AM = Backup; 1204193323Sed 1205193323Sed // If we couldn't fold both operands into the address at the same time, 1206193323Sed // see if we can just put each operand into a register and fold at least 1207193323Sed // the add. 1208193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 1209207618Srdivacky !AM.Base_Reg.getNode() && 1210195098Sed !AM.IndexReg.getNode()) { 1211218893Sdim N = Handle.getValue(); 1212218893Sdim AM.Base_Reg = N.getOperand(0); 1213218893Sdim AM.IndexReg = N.getOperand(1); 1214193323Sed AM.Scale = 1; 1215193323Sed return false; 1216193323Sed } 1217218893Sdim N = Handle.getValue(); 1218193323Sed break; 1219193323Sed } 1220193323Sed 1221193323Sed case ISD::OR: 1222193323Sed // Handle "X | C" as "X + C" iff X is known to have C bits clear. 1223218893Sdim if (CurDAG->isBaseWithConstantOffset(N)) { 1224193323Sed X86ISelAddressMode Backup = AM; 1225207618Srdivacky ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); 1226205407Srdivacky 1227193323Sed // Start with the LHS as an addr mode. 1228210299Sed if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1229224145Sdim !FoldOffsetIntoAddress(CN->getSExtValue(), AM)) 1230193323Sed return false; 1231193323Sed AM = Backup; 1232193323Sed } 1233193323Sed break; 1234245431Sdim 1235193323Sed case ISD::AND: { 1236193323Sed // Perform some heroic transforms on an and of a constant-count shift 1237193323Sed // with a constant to enable use of the scaled offset field. 1238193323Sed 1239193323Sed // Scale must not be used already. 1240193323Sed if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1241193323Sed 1242235633Sdim SDValue Shift = N.getOperand(0); 1243235633Sdim if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; 1244193323Sed SDValue X = Shift.getOperand(0); 1245193323Sed 1246235633Sdim // We only handle up to 64-bit values here as those are what matter for 1247235633Sdim // addressing mode optimizations. 1248263509Sdim if (X.getSimpleValueType().getSizeInBits() > 64) break; 1249193323Sed 1250235633Sdim if (!isa<ConstantSDNode>(N.getOperand(1))) 1251235633Sdim break; 1252235633Sdim uint64_t Mask = N.getConstantOperandVal(1); 1253193323Sed 1254235633Sdim // Try to fold the mask and shift into an extract and scale. 1255235633Sdim if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) 1256235633Sdim return false; 1257193323Sed 1258235633Sdim // Try to fold the mask and shift directly into the scale. 1259235633Sdim if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) 1260235633Sdim return false; 1261193323Sed 1262235633Sdim // Try to swap the mask and shift to place shifts which can be done as 1263235633Sdim // a scale on the outside of the mask. 1264235633Sdim if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM)) 1265235633Sdim return false; 1266235633Sdim break; 1267193323Sed } 1268193323Sed } 1269193323Sed 1270193323Sed return MatchAddressBase(N, AM); 1271193323Sed} 1272193323Sed 1273193323Sed/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1274193323Sed/// specified addressing mode without any further recursion. 1275193323Sedbool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1276193323Sed // Is the base register already occupied? 1277207618Srdivacky if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { 1278193323Sed // If so, check to see if the scale index register is set. 1279195098Sed if (AM.IndexReg.getNode() == 0) { 1280193323Sed AM.IndexReg = N; 1281193323Sed AM.Scale = 1; 1282193323Sed return false; 1283193323Sed } 1284193323Sed 1285193323Sed // Otherwise, we cannot select it. 1286193323Sed return true; 1287193323Sed } 1288193323Sed 1289193323Sed // Default, generate it as a register. 1290193323Sed AM.BaseType = X86ISelAddressMode::RegBase; 1291207618Srdivacky AM.Base_Reg = N; 1292193323Sed return false; 1293193323Sed} 1294193323Sed 1295193323Sed/// SelectAddr - returns true if it is able pattern match an addressing mode. 1296193323Sed/// It returns the operands which make up the maximal addressing mode it can 1297193323Sed/// match by reference. 1298218893Sdim/// 1299218893Sdim/// Parent is the parent node of the addr operand that is being matched. It 1300218893Sdim/// is always a load, store, atomic node, or null. It is only null when 1301218893Sdim/// checking memory operands for inline asm nodes. 1302218893Sdimbool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, 1303193323Sed SDValue &Scale, SDValue &Index, 1304193323Sed SDValue &Disp, SDValue &Segment) { 1305193323Sed X86ISelAddressMode AM; 1306245431Sdim 1307218893Sdim if (Parent && 1308218893Sdim // This list of opcodes are all the nodes that have an "addr:$ptr" operand 1309218893Sdim // that are not a MemSDNode, and thus don't have proper addrspace info. 1310218893Sdim Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme 1311218893Sdim Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores 1312245431Sdim Parent->getOpcode() != X86ISD::TLSCALL && // Fixme 1313245431Sdim Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp 1314245431Sdim Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp 1315218893Sdim unsigned AddrSpace = 1316218893Sdim cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); 1317218893Sdim // AddrSpace 256 -> GS, 257 -> FS. 1318218893Sdim if (AddrSpace == 256) 1319218893Sdim AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 1320218893Sdim if (AddrSpace == 257) 1321218893Sdim AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 1322218893Sdim } 1323245431Sdim 1324201360Srdivacky if (MatchAddress(N, AM)) 1325193323Sed return false; 1326193323Sed 1327263509Sdim MVT VT = N.getSimpleValueType(); 1328193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) { 1329207618Srdivacky if (!AM.Base_Reg.getNode()) 1330207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, VT); 1331193323Sed } 1332193323Sed 1333193323Sed if (!AM.IndexReg.getNode()) 1334193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1335193323Sed 1336193323Sed getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1337193323Sed return true; 1338193323Sed} 1339193323Sed 1340193323Sed/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1341193323Sed/// match a load whose top elements are either undef or zeros. The load flavor 1342193323Sed/// is derived from the type of N, which is either v4f32 or v2f64. 1343204642Srdivacky/// 1344204642Srdivacky/// We also return: 1345204642Srdivacky/// PatternChainNode: this is the matched node that has a chain input and 1346204642Srdivacky/// output. 1347204642Srdivackybool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, 1348193323Sed SDValue N, SDValue &Base, 1349193323Sed SDValue &Scale, SDValue &Index, 1350193323Sed SDValue &Disp, SDValue &Segment, 1351204642Srdivacky SDValue &PatternNodeWithChain) { 1352193323Sed if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1353204642Srdivacky PatternNodeWithChain = N.getOperand(0); 1354204642Srdivacky if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && 1355204642Srdivacky PatternNodeWithChain.hasOneUse() && 1356204642Srdivacky IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1357207618Srdivacky IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 1358204642Srdivacky LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); 1359218893Sdim if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1360193323Sed return false; 1361193323Sed return true; 1362193323Sed } 1363193323Sed } 1364193323Sed 1365193323Sed // Also handle the case where we explicitly require zeros in the top 1366193323Sed // elements. This is a vector shuffle from the zero vector. 1367193323Sed if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1368193323Sed // Check to see if the top elements are all zeros (or bitcast of zeros). 1369245431Sdim N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1370193323Sed N.getOperand(0).getNode()->hasOneUse() && 1371193323Sed ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1372204642Srdivacky N.getOperand(0).getOperand(0).hasOneUse() && 1373204642Srdivacky IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1374207618Srdivacky IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 1375193323Sed // Okay, this is a zero extending load. Fold it. 1376193323Sed LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1377218893Sdim if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1378193323Sed return false; 1379204642Srdivacky PatternNodeWithChain = SDValue(LD, 0); 1380193323Sed return true; 1381193323Sed } 1382193323Sed return false; 1383193323Sed} 1384193323Sed 1385193323Sed 1386263509Sdimbool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { 1387263509Sdim if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1388263509Sdim uint64_t ImmVal = CN->getZExtValue(); 1389263509Sdim if ((uint32_t)ImmVal != (uint64_t)ImmVal) 1390263509Sdim return false; 1391263509Sdim 1392263509Sdim Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64); 1393263509Sdim return true; 1394263509Sdim } 1395263509Sdim 1396263509Sdim // In static codegen with small code model, we can get the address of a label 1397263509Sdim // into a register with 'movl'. TableGen has already made sure we're looking 1398263509Sdim // at a label of some kind. 1399263509Sdim assert(N->getOpcode() == X86ISD::Wrapper && 1400263509Sdim "Unexpected node type for MOV32ri64"); 1401263509Sdim N = N.getOperand(0); 1402263509Sdim 1403263509Sdim if (N->getOpcode() != ISD::TargetConstantPool && 1404263509Sdim N->getOpcode() != ISD::TargetJumpTable && 1405263509Sdim N->getOpcode() != ISD::TargetGlobalAddress && 1406263509Sdim N->getOpcode() != ISD::TargetExternalSymbol && 1407263509Sdim N->getOpcode() != ISD::TargetBlockAddress) 1408263509Sdim return false; 1409263509Sdim 1410263509Sdim Imm = N; 1411263509Sdim return TM.getCodeModel() == CodeModel::Small; 1412263509Sdim} 1413263509Sdim 1414263509Sdimbool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base, 1415263509Sdim SDValue &Scale, SDValue &Index, 1416263509Sdim SDValue &Disp, SDValue &Segment) { 1417263509Sdim if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment)) 1418263509Sdim return false; 1419263509Sdim 1420263509Sdim SDLoc DL(N); 1421263509Sdim RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base); 1422263509Sdim if (RN && RN->getReg() == 0) 1423263509Sdim Base = CurDAG->getRegister(0, MVT::i64); 1424263509Sdim else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(N)) { 1425263509Sdim // Base could already be %rip, particularly in the x32 ABI. 1426263509Sdim Base = SDValue(CurDAG->getMachineNode( 1427263509Sdim TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, 1428263509Sdim CurDAG->getTargetConstant(0, MVT::i64), 1429263509Sdim Base, 1430263509Sdim CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 1431263509Sdim 0); 1432263509Sdim } 1433263509Sdim 1434263509Sdim RN = dyn_cast<RegisterSDNode>(Index); 1435263509Sdim if (RN && RN->getReg() == 0) 1436263509Sdim Index = CurDAG->getRegister(0, MVT::i64); 1437263509Sdim else { 1438263509Sdim assert(Index.getValueType() == MVT::i32 && 1439263509Sdim "Expect to be extending 32-bit registers for use in LEA"); 1440263509Sdim Index = SDValue(CurDAG->getMachineNode( 1441263509Sdim TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, 1442263509Sdim CurDAG->getTargetConstant(0, MVT::i64), 1443263509Sdim Index, 1444263509Sdim CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 1445263509Sdim 0); 1446263509Sdim } 1447263509Sdim 1448263509Sdim return true; 1449263509Sdim} 1450263509Sdim 1451193323Sed/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1452193323Sed/// mode it matches can be cost effectively emitted as an LEA instruction. 1453218893Sdimbool X86DAGToDAGISel::SelectLEAAddr(SDValue N, 1454193323Sed SDValue &Base, SDValue &Scale, 1455210299Sed SDValue &Index, SDValue &Disp, 1456210299Sed SDValue &Segment) { 1457193323Sed X86ISelAddressMode AM; 1458193323Sed 1459193323Sed // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1460193323Sed // segments. 1461193323Sed SDValue Copy = AM.Segment; 1462193323Sed SDValue T = CurDAG->getRegister(0, MVT::i32); 1463193323Sed AM.Segment = T; 1464193323Sed if (MatchAddress(N, AM)) 1465193323Sed return false; 1466193323Sed assert (T == AM.Segment); 1467193323Sed AM.Segment = Copy; 1468193323Sed 1469263509Sdim MVT VT = N.getSimpleValueType(); 1470193323Sed unsigned Complexity = 0; 1471193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) 1472207618Srdivacky if (AM.Base_Reg.getNode()) 1473193323Sed Complexity = 1; 1474193323Sed else 1475207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, VT); 1476193323Sed else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1477193323Sed Complexity = 4; 1478193323Sed 1479193323Sed if (AM.IndexReg.getNode()) 1480193323Sed Complexity++; 1481193323Sed else 1482193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1483193323Sed 1484193323Sed // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1485193323Sed // a simple shift. 1486193323Sed if (AM.Scale > 1) 1487193323Sed Complexity++; 1488193323Sed 1489193323Sed // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1490193323Sed // to a LEA. This is determined with some expermentation but is by no means 1491193323Sed // optimal (especially for code size consideration). LEA is nice because of 1492193323Sed // its three-address nature. Tweak the cost function again when we can run 1493193323Sed // convertToThreeAddress() at register allocation time. 1494193323Sed if (AM.hasSymbolicDisplacement()) { 1495193323Sed // For X86-64, we should always use lea to materialize RIP relative 1496193323Sed // addresses. 1497193323Sed if (Subtarget->is64Bit()) 1498193323Sed Complexity = 4; 1499193323Sed else 1500193323Sed Complexity += 2; 1501193323Sed } 1502193323Sed 1503207618Srdivacky if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode())) 1504193323Sed Complexity++; 1505193323Sed 1506198090Srdivacky // If it isn't worth using an LEA, reject it. 1507198090Srdivacky if (Complexity <= 2) 1508198090Srdivacky return false; 1509245431Sdim 1510198090Srdivacky getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1511198090Srdivacky return true; 1512193323Sed} 1513193323Sed 1514194612Sed/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1515218893Sdimbool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, 1516194612Sed SDValue &Scale, SDValue &Index, 1517210299Sed SDValue &Disp, SDValue &Segment) { 1518194612Sed assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1519194612Sed const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1520245431Sdim 1521194612Sed X86ISelAddressMode AM; 1522194612Sed AM.GV = GA->getGlobal(); 1523194612Sed AM.Disp += GA->getOffset(); 1524207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, N.getValueType()); 1525195098Sed AM.SymbolFlags = GA->getTargetFlags(); 1526195098Sed 1527194612Sed if (N.getValueType() == MVT::i32) { 1528194612Sed AM.Scale = 1; 1529194612Sed AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1530194612Sed } else { 1531194612Sed AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1532194612Sed } 1533245431Sdim 1534194612Sed getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1535194612Sed return true; 1536194612Sed} 1537194612Sed 1538194612Sed 1539202375Srdivackybool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, 1540193323Sed SDValue &Base, SDValue &Scale, 1541193323Sed SDValue &Index, SDValue &Disp, 1542193323Sed SDValue &Segment) { 1543204642Srdivacky if (!ISD::isNON_EXTLoad(N.getNode()) || 1544204642Srdivacky !IsProfitableToFold(N, P, P) || 1545207618Srdivacky !IsLegalToFold(N, P, P, OptLevel)) 1546204642Srdivacky return false; 1547245431Sdim 1548218893Sdim return SelectAddr(N.getNode(), 1549218893Sdim N.getOperand(1), Base, Scale, Index, Disp, Segment); 1550193323Sed} 1551193323Sed 1552193323Sed/// getGlobalBaseReg - Return an SDNode that returns the value of 1553193323Sed/// the global base register. Output instructions required to 1554193323Sed/// initialize the global base register, if necessary. 1555193323Sed/// 1556193323SedSDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1557193399Sed unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1558263509Sdim return CurDAG->getRegister(GlobalBaseReg, 1559263509Sdim getTargetLowering()->getPointerTy()).getNode(); 1560193323Sed} 1561193323Sed 1562193323SedSDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1563193323Sed SDValue Chain = Node->getOperand(0); 1564193323Sed SDValue In1 = Node->getOperand(1); 1565193323Sed SDValue In2L = Node->getOperand(2); 1566193323Sed SDValue In2H = Node->getOperand(3); 1567245431Sdim 1568193323Sed SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1569218893Sdim if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1570193323Sed return NULL; 1571198090Srdivacky MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1572198090Srdivacky MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1573198090Srdivacky const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; 1574263509Sdim SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), 1575252723Sdim MVT::i32, MVT::i32, MVT::Other, Ops); 1576198090Srdivacky cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); 1577198090Srdivacky return ResNode; 1578193323Sed} 1579193323Sed 1580245431Sdim/// Atomic opcode table 1581245431Sdim/// 1582223017Sdimenum AtomicOpc { 1583245431Sdim ADD, 1584245431Sdim SUB, 1585245431Sdim INC, 1586245431Sdim DEC, 1587223017Sdim OR, 1588223017Sdim AND, 1589223017Sdim XOR, 1590223017Sdim AtomicOpcEnd 1591223017Sdim}; 1592223017Sdim 1593223017Sdimenum AtomicSz { 1594223017Sdim ConstantI8, 1595223017Sdim I8, 1596223017Sdim SextConstantI16, 1597223017Sdim ConstantI16, 1598223017Sdim I16, 1599223017Sdim SextConstantI32, 1600223017Sdim ConstantI32, 1601223017Sdim I32, 1602223017Sdim SextConstantI64, 1603223017Sdim ConstantI64, 1604223017Sdim I64, 1605223017Sdim AtomicSzEnd 1606223017Sdim}; 1607223017Sdim 1608235633Sdimstatic const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { 1609223017Sdim { 1610245431Sdim X86::LOCK_ADD8mi, 1611245431Sdim X86::LOCK_ADD8mr, 1612245431Sdim X86::LOCK_ADD16mi8, 1613245431Sdim X86::LOCK_ADD16mi, 1614245431Sdim X86::LOCK_ADD16mr, 1615245431Sdim X86::LOCK_ADD32mi8, 1616245431Sdim X86::LOCK_ADD32mi, 1617245431Sdim X86::LOCK_ADD32mr, 1618245431Sdim X86::LOCK_ADD64mi8, 1619245431Sdim X86::LOCK_ADD64mi32, 1620245431Sdim X86::LOCK_ADD64mr, 1621245431Sdim }, 1622245431Sdim { 1623245431Sdim X86::LOCK_SUB8mi, 1624245431Sdim X86::LOCK_SUB8mr, 1625245431Sdim X86::LOCK_SUB16mi8, 1626245431Sdim X86::LOCK_SUB16mi, 1627245431Sdim X86::LOCK_SUB16mr, 1628245431Sdim X86::LOCK_SUB32mi8, 1629245431Sdim X86::LOCK_SUB32mi, 1630245431Sdim X86::LOCK_SUB32mr, 1631245431Sdim X86::LOCK_SUB64mi8, 1632245431Sdim X86::LOCK_SUB64mi32, 1633245431Sdim X86::LOCK_SUB64mr, 1634245431Sdim }, 1635245431Sdim { 1636245431Sdim 0, 1637245431Sdim X86::LOCK_INC8m, 1638245431Sdim 0, 1639245431Sdim 0, 1640245431Sdim X86::LOCK_INC16m, 1641245431Sdim 0, 1642245431Sdim 0, 1643245431Sdim X86::LOCK_INC32m, 1644245431Sdim 0, 1645245431Sdim 0, 1646245431Sdim X86::LOCK_INC64m, 1647245431Sdim }, 1648245431Sdim { 1649245431Sdim 0, 1650245431Sdim X86::LOCK_DEC8m, 1651245431Sdim 0, 1652245431Sdim 0, 1653245431Sdim X86::LOCK_DEC16m, 1654245431Sdim 0, 1655245431Sdim 0, 1656245431Sdim X86::LOCK_DEC32m, 1657245431Sdim 0, 1658245431Sdim 0, 1659245431Sdim X86::LOCK_DEC64m, 1660245431Sdim }, 1661245431Sdim { 1662223017Sdim X86::LOCK_OR8mi, 1663223017Sdim X86::LOCK_OR8mr, 1664223017Sdim X86::LOCK_OR16mi8, 1665223017Sdim X86::LOCK_OR16mi, 1666223017Sdim X86::LOCK_OR16mr, 1667223017Sdim X86::LOCK_OR32mi8, 1668223017Sdim X86::LOCK_OR32mi, 1669223017Sdim X86::LOCK_OR32mr, 1670223017Sdim X86::LOCK_OR64mi8, 1671223017Sdim X86::LOCK_OR64mi32, 1672245431Sdim X86::LOCK_OR64mr, 1673223017Sdim }, 1674223017Sdim { 1675223017Sdim X86::LOCK_AND8mi, 1676223017Sdim X86::LOCK_AND8mr, 1677223017Sdim X86::LOCK_AND16mi8, 1678223017Sdim X86::LOCK_AND16mi, 1679223017Sdim X86::LOCK_AND16mr, 1680223017Sdim X86::LOCK_AND32mi8, 1681223017Sdim X86::LOCK_AND32mi, 1682223017Sdim X86::LOCK_AND32mr, 1683223017Sdim X86::LOCK_AND64mi8, 1684223017Sdim X86::LOCK_AND64mi32, 1685245431Sdim X86::LOCK_AND64mr, 1686223017Sdim }, 1687223017Sdim { 1688223017Sdim X86::LOCK_XOR8mi, 1689223017Sdim X86::LOCK_XOR8mr, 1690223017Sdim X86::LOCK_XOR16mi8, 1691223017Sdim X86::LOCK_XOR16mi, 1692223017Sdim X86::LOCK_XOR16mr, 1693223017Sdim X86::LOCK_XOR32mi8, 1694223017Sdim X86::LOCK_XOR32mi, 1695223017Sdim X86::LOCK_XOR32mr, 1696223017Sdim X86::LOCK_XOR64mi8, 1697223017Sdim X86::LOCK_XOR64mi32, 1698245431Sdim X86::LOCK_XOR64mr, 1699223017Sdim } 1700223017Sdim}; 1701223017Sdim 1702245431Sdim// Return the target constant operand for atomic-load-op and do simple 1703245431Sdim// translations, such as from atomic-load-add to lock-sub. The return value is 1704245431Sdim// one of the following 3 cases: 1705245431Sdim// + target-constant, the operand could be supported as a target constant. 1706245431Sdim// + empty, the operand is not needed any more with the new op selected. 1707245431Sdim// + non-empty, otherwise. 1708245431Sdimstatic SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, 1709263509Sdim SDLoc dl, 1710263509Sdim enum AtomicOpc &Op, MVT NVT, 1711245431Sdim SDValue Val) { 1712245431Sdim if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) { 1713245431Sdim int64_t CNVal = CN->getSExtValue(); 1714245431Sdim // Quit if not 32-bit imm. 1715245431Sdim if ((int32_t)CNVal != CNVal) 1716245431Sdim return Val; 1717245431Sdim // For atomic-load-add, we could do some optimizations. 1718245431Sdim if (Op == ADD) { 1719245431Sdim // Translate to INC/DEC if ADD by 1 or -1. 1720245431Sdim if ((CNVal == 1) || (CNVal == -1)) { 1721245431Sdim Op = (CNVal == 1) ? INC : DEC; 1722245431Sdim // No more constant operand after being translated into INC/DEC. 1723245431Sdim return SDValue(); 1724245431Sdim } 1725245431Sdim // Translate to SUB if ADD by negative value. 1726245431Sdim if (CNVal < 0) { 1727245431Sdim Op = SUB; 1728245431Sdim CNVal = -CNVal; 1729245431Sdim } 1730245431Sdim } 1731245431Sdim return CurDAG->getTargetConstant(CNVal, NVT); 1732245431Sdim } 1733245431Sdim 1734245431Sdim // If the value operand is single-used, try to optimize it. 1735245431Sdim if (Op == ADD && Val.hasOneUse()) { 1736245431Sdim // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). 1737245431Sdim if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { 1738245431Sdim Op = SUB; 1739245431Sdim return Val.getOperand(1); 1740245431Sdim } 1741245431Sdim // A special case for i16, which needs truncating as, in most cases, it's 1742245431Sdim // promoted to i32. We will translate 1743245431Sdim // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) 1744245431Sdim if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && 1745245431Sdim Val.getOperand(0).getOpcode() == ISD::SUB && 1746245431Sdim X86::isZeroNode(Val.getOperand(0).getOperand(0))) { 1747245431Sdim Op = SUB; 1748245431Sdim Val = Val.getOperand(0); 1749245431Sdim return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, 1750245431Sdim Val.getOperand(1)); 1751245431Sdim } 1752245431Sdim } 1753245431Sdim 1754245431Sdim return Val; 1755245431Sdim} 1756245431Sdim 1757263509SdimSDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { 1758223017Sdim if (Node->hasAnyUseOfValue(0)) 1759223017Sdim return 0; 1760245431Sdim 1761263509Sdim SDLoc dl(Node); 1762245431Sdim 1763223017Sdim // Optimize common patterns for __sync_or_and_fetch and similar arith 1764223017Sdim // operations where the result is not used. This allows us to use the "lock" 1765223017Sdim // version of the arithmetic instruction. 1766223017Sdim SDValue Chain = Node->getOperand(0); 1767223017Sdim SDValue Ptr = Node->getOperand(1); 1768223017Sdim SDValue Val = Node->getOperand(2); 1769223017Sdim SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1770223017Sdim if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1771223017Sdim return 0; 1772223017Sdim 1773223017Sdim // Which index into the table. 1774223017Sdim enum AtomicOpc Op; 1775223017Sdim switch (Node->getOpcode()) { 1776245431Sdim default: 1777245431Sdim return 0; 1778223017Sdim case ISD::ATOMIC_LOAD_OR: 1779223017Sdim Op = OR; 1780223017Sdim break; 1781223017Sdim case ISD::ATOMIC_LOAD_AND: 1782223017Sdim Op = AND; 1783223017Sdim break; 1784223017Sdim case ISD::ATOMIC_LOAD_XOR: 1785223017Sdim Op = XOR; 1786223017Sdim break; 1787245431Sdim case ISD::ATOMIC_LOAD_ADD: 1788245431Sdim Op = ADD; 1789245431Sdim break; 1790223017Sdim } 1791252723Sdim 1792245431Sdim Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); 1793245431Sdim bool isUnOp = !Val.getNode(); 1794245431Sdim bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); 1795245431Sdim 1796223017Sdim unsigned Opc = 0; 1797263509Sdim switch (NVT.SimpleTy) { 1798223017Sdim default: return 0; 1799223017Sdim case MVT::i8: 1800223017Sdim if (isCN) 1801223017Sdim Opc = AtomicOpcTbl[Op][ConstantI8]; 1802223017Sdim else 1803223017Sdim Opc = AtomicOpcTbl[Op][I8]; 1804223017Sdim break; 1805223017Sdim case MVT::i16: 1806223017Sdim if (isCN) { 1807223017Sdim if (immSext8(Val.getNode())) 1808223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI16]; 1809223017Sdim else 1810223017Sdim Opc = AtomicOpcTbl[Op][ConstantI16]; 1811223017Sdim } else 1812223017Sdim Opc = AtomicOpcTbl[Op][I16]; 1813223017Sdim break; 1814223017Sdim case MVT::i32: 1815223017Sdim if (isCN) { 1816223017Sdim if (immSext8(Val.getNode())) 1817223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI32]; 1818223017Sdim else 1819223017Sdim Opc = AtomicOpcTbl[Op][ConstantI32]; 1820223017Sdim } else 1821223017Sdim Opc = AtomicOpcTbl[Op][I32]; 1822223017Sdim break; 1823223017Sdim case MVT::i64: 1824224145Sdim Opc = AtomicOpcTbl[Op][I64]; 1825223017Sdim if (isCN) { 1826223017Sdim if (immSext8(Val.getNode())) 1827223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI64]; 1828223017Sdim else if (i64immSExt32(Val.getNode())) 1829223017Sdim Opc = AtomicOpcTbl[Op][ConstantI64]; 1830224145Sdim } 1831223017Sdim break; 1832223017Sdim } 1833245431Sdim 1834224145Sdim assert(Opc != 0 && "Invalid arith lock transform!"); 1835224145Sdim 1836245431Sdim SDValue Ret; 1837223017Sdim SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 1838223017Sdim dl, NVT), 0); 1839223017Sdim MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1840223017Sdim MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1841245431Sdim if (isUnOp) { 1842245431Sdim SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; 1843252723Sdim Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); 1844245431Sdim } else { 1845245431Sdim SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; 1846252723Sdim Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); 1847245431Sdim } 1848223017Sdim cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1849223017Sdim SDValue RetVals[] = { Undef, Ret }; 1850223017Sdim return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1851223017Sdim} 1852223017Sdim 1853198090Srdivacky/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has 1854198090Srdivacky/// any uses which require the SF or OF bits to be accurate. 1855198090Srdivackystatic bool HasNoSignedComparisonUses(SDNode *N) { 1856198090Srdivacky // Examine each user of the node. 1857198090Srdivacky for (SDNode::use_iterator UI = N->use_begin(), 1858198090Srdivacky UE = N->use_end(); UI != UE; ++UI) { 1859198090Srdivacky // Only examine CopyToReg uses. 1860198090Srdivacky if (UI->getOpcode() != ISD::CopyToReg) 1861198090Srdivacky return false; 1862198090Srdivacky // Only examine CopyToReg uses that copy to EFLAGS. 1863198090Srdivacky if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 1864198090Srdivacky X86::EFLAGS) 1865198090Srdivacky return false; 1866198090Srdivacky // Examine each user of the CopyToReg use. 1867198090Srdivacky for (SDNode::use_iterator FlagUI = UI->use_begin(), 1868198090Srdivacky FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 1869198090Srdivacky // Only examine the Flag result. 1870198090Srdivacky if (FlagUI.getUse().getResNo() != 1) continue; 1871198090Srdivacky // Anything unusual: assume conservatively. 1872198090Srdivacky if (!FlagUI->isMachineOpcode()) return false; 1873198090Srdivacky // Examine the opcode of the user. 1874198090Srdivacky switch (FlagUI->getMachineOpcode()) { 1875198090Srdivacky // These comparisons don't treat the most significant bit specially. 1876198090Srdivacky case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 1877198090Srdivacky case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 1878198090Srdivacky case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 1879198090Srdivacky case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 1880203954Srdivacky case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: 1881203954Srdivacky case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: 1882198090Srdivacky case X86::CMOVA16rr: case X86::CMOVA16rm: 1883198090Srdivacky case X86::CMOVA32rr: case X86::CMOVA32rm: 1884198090Srdivacky case X86::CMOVA64rr: case X86::CMOVA64rm: 1885198090Srdivacky case X86::CMOVAE16rr: case X86::CMOVAE16rm: 1886198090Srdivacky case X86::CMOVAE32rr: case X86::CMOVAE32rm: 1887198090Srdivacky case X86::CMOVAE64rr: case X86::CMOVAE64rm: 1888198090Srdivacky case X86::CMOVB16rr: case X86::CMOVB16rm: 1889198090Srdivacky case X86::CMOVB32rr: case X86::CMOVB32rm: 1890198090Srdivacky case X86::CMOVB64rr: case X86::CMOVB64rm: 1891198090Srdivacky case X86::CMOVBE16rr: case X86::CMOVBE16rm: 1892198090Srdivacky case X86::CMOVBE32rr: case X86::CMOVBE32rm: 1893198090Srdivacky case X86::CMOVBE64rr: case X86::CMOVBE64rm: 1894198090Srdivacky case X86::CMOVE16rr: case X86::CMOVE16rm: 1895198090Srdivacky case X86::CMOVE32rr: case X86::CMOVE32rm: 1896198090Srdivacky case X86::CMOVE64rr: case X86::CMOVE64rm: 1897198090Srdivacky case X86::CMOVNE16rr: case X86::CMOVNE16rm: 1898198090Srdivacky case X86::CMOVNE32rr: case X86::CMOVNE32rm: 1899198090Srdivacky case X86::CMOVNE64rr: case X86::CMOVNE64rm: 1900198090Srdivacky case X86::CMOVNP16rr: case X86::CMOVNP16rm: 1901198090Srdivacky case X86::CMOVNP32rr: case X86::CMOVNP32rm: 1902198090Srdivacky case X86::CMOVNP64rr: case X86::CMOVNP64rm: 1903198090Srdivacky case X86::CMOVP16rr: case X86::CMOVP16rm: 1904198090Srdivacky case X86::CMOVP32rr: case X86::CMOVP32rm: 1905198090Srdivacky case X86::CMOVP64rr: case X86::CMOVP64rm: 1906198090Srdivacky continue; 1907198090Srdivacky // Anything else: assume conservatively. 1908198090Srdivacky default: return false; 1909198090Srdivacky } 1910198090Srdivacky } 1911198090Srdivacky } 1912198090Srdivacky return true; 1913198090Srdivacky} 1914198090Srdivacky 1915235633Sdim/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode 1916235633Sdim/// is suitable for doing the {load; increment or decrement; store} to modify 1917235633Sdim/// transformation. 1918245431Sdimstatic bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, 1919235633Sdim SDValue StoredVal, SelectionDAG *CurDAG, 1920235633Sdim LoadSDNode* &LoadNode, SDValue &InputChain) { 1921235633Sdim 1922235633Sdim // is the value stored the result of a DEC or INC? 1923235633Sdim if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false; 1924235633Sdim 1925235633Sdim // is the stored value result 0 of the load? 1926235633Sdim if (StoredVal.getResNo() != 0) return false; 1927235633Sdim 1928235633Sdim // are there other uses of the loaded value than the inc or dec? 1929235633Sdim if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; 1930235633Sdim 1931235633Sdim // is the store non-extending and non-indexed? 1932235633Sdim if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) 1933235633Sdim return false; 1934235633Sdim 1935235633Sdim SDValue Load = StoredVal->getOperand(0); 1936235633Sdim // Is the stored value a non-extending and non-indexed load? 1937235633Sdim if (!ISD::isNormalLoad(Load.getNode())) return false; 1938235633Sdim 1939235633Sdim // Return LoadNode by reference. 1940235633Sdim LoadNode = cast<LoadSDNode>(Load); 1941235633Sdim // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) 1942245431Sdim EVT LdVT = LoadNode->getMemoryVT(); 1943245431Sdim if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && 1944235633Sdim LdVT != MVT::i8) 1945235633Sdim return false; 1946235633Sdim 1947235633Sdim // Is store the only read of the loaded value? 1948235633Sdim if (!Load.hasOneUse()) 1949235633Sdim return false; 1950245431Sdim 1951235633Sdim // Is the address of the store the same as the load? 1952235633Sdim if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || 1953235633Sdim LoadNode->getOffset() != StoreNode->getOffset()) 1954235633Sdim return false; 1955235633Sdim 1956235633Sdim // Check if the chain is produced by the load or is a TokenFactor with 1957235633Sdim // the load output chain as an operand. Return InputChain by reference. 1958235633Sdim SDValue Chain = StoreNode->getChain(); 1959235633Sdim 1960235633Sdim bool ChainCheck = false; 1961235633Sdim if (Chain == Load.getValue(1)) { 1962235633Sdim ChainCheck = true; 1963235633Sdim InputChain = LoadNode->getChain(); 1964235633Sdim } else if (Chain.getOpcode() == ISD::TokenFactor) { 1965235633Sdim SmallVector<SDValue, 4> ChainOps; 1966235633Sdim for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { 1967235633Sdim SDValue Op = Chain.getOperand(i); 1968235633Sdim if (Op == Load.getValue(1)) { 1969235633Sdim ChainCheck = true; 1970235633Sdim continue; 1971235633Sdim } 1972245431Sdim 1973245431Sdim // Make sure using Op as part of the chain would not cause a cycle here. 1974245431Sdim // In theory, we could check whether the chain node is a predecessor of 1975245431Sdim // the load. But that can be very expensive. Instead visit the uses and 1976245431Sdim // make sure they all have smaller node id than the load. 1977245431Sdim int LoadId = LoadNode->getNodeId(); 1978245431Sdim for (SDNode::use_iterator UI = Op.getNode()->use_begin(), 1979245431Sdim UE = UI->use_end(); UI != UE; ++UI) { 1980245431Sdim if (UI.getUse().getResNo() != 0) 1981245431Sdim continue; 1982245431Sdim if (UI->getNodeId() > LoadId) 1983245431Sdim return false; 1984245431Sdim } 1985245431Sdim 1986235633Sdim ChainOps.push_back(Op); 1987235633Sdim } 1988235633Sdim 1989235633Sdim if (ChainCheck) 1990235633Sdim // Make a new TokenFactor with all the other input chains except 1991235633Sdim // for the load. 1992263509Sdim InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), 1993235633Sdim MVT::Other, &ChainOps[0], ChainOps.size()); 1994235633Sdim } 1995235633Sdim if (!ChainCheck) 1996235633Sdim return false; 1997235633Sdim 1998235633Sdim return true; 1999235633Sdim} 2000235633Sdim 2001235633Sdim/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory 2002235633Sdim/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC. 2003235633Sdimstatic unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { 2004235633Sdim if (Opc == X86ISD::DEC) { 2005235633Sdim if (LdVT == MVT::i64) return X86::DEC64m; 2006235633Sdim if (LdVT == MVT::i32) return X86::DEC32m; 2007235633Sdim if (LdVT == MVT::i16) return X86::DEC16m; 2008235633Sdim if (LdVT == MVT::i8) return X86::DEC8m; 2009235633Sdim } else { 2010235633Sdim assert(Opc == X86ISD::INC && "unrecognized opcode"); 2011235633Sdim if (LdVT == MVT::i64) return X86::INC64m; 2012235633Sdim if (LdVT == MVT::i32) return X86::INC32m; 2013235633Sdim if (LdVT == MVT::i16) return X86::INC16m; 2014235633Sdim if (LdVT == MVT::i8) return X86::INC8m; 2015235633Sdim } 2016235633Sdim llvm_unreachable("unrecognized size for LdVT"); 2017235633Sdim} 2018235633Sdim 2019245431Sdim/// SelectGather - Customized ISel for GATHER operations. 2020245431Sdim/// 2021245431SdimSDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { 2022245431Sdim // Operands of Gather: VSrc, Base, VIdx, VMask, Scale 2023245431Sdim SDValue Chain = Node->getOperand(0); 2024245431Sdim SDValue VSrc = Node->getOperand(2); 2025245431Sdim SDValue Base = Node->getOperand(3); 2026245431Sdim SDValue VIdx = Node->getOperand(4); 2027245431Sdim SDValue VMask = Node->getOperand(5); 2028245431Sdim ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); 2029245431Sdim if (!Scale) 2030245431Sdim return 0; 2031245431Sdim 2032245431Sdim SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), 2033245431Sdim MVT::Other); 2034245431Sdim 2035245431Sdim // Memory Operands: Base, Scale, Index, Disp, Segment 2036245431Sdim SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); 2037245431Sdim SDValue Segment = CurDAG->getRegister(0, MVT::i32); 2038245431Sdim const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, 2039245431Sdim Disp, Segment, VMask, Chain}; 2040263509Sdim SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops); 2041245431Sdim // Node has 2 outputs: VDst and MVT::Other. 2042245431Sdim // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. 2043245431Sdim // We replace VDst of Node with VDst of ResNode, and Other of Node with Other 2044245431Sdim // of ResNode. 2045245431Sdim ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); 2046245431Sdim ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2)); 2047245431Sdim return ResNode; 2048245431Sdim} 2049245431Sdim 2050202375SrdivackySDNode *X86DAGToDAGISel::Select(SDNode *Node) { 2051263509Sdim MVT NVT = Node->getSimpleValueType(0); 2052193323Sed unsigned Opc, MOpc; 2053193323Sed unsigned Opcode = Node->getOpcode(); 2054263509Sdim SDLoc dl(Node); 2055245431Sdim 2056204642Srdivacky DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); 2057193323Sed 2058193323Sed if (Node->isMachineOpcode()) { 2059204642Srdivacky DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); 2060255946Sdim Node->setNodeId(-1); 2061193323Sed return NULL; // Already selected. 2062193323Sed } 2063193323Sed 2064193323Sed switch (Opcode) { 2065198090Srdivacky default: break; 2066245431Sdim case ISD::INTRINSIC_W_CHAIN: { 2067245431Sdim unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2068245431Sdim switch (IntNo) { 2069245431Sdim default: break; 2070245431Sdim case Intrinsic::x86_avx2_gather_d_pd: 2071245431Sdim case Intrinsic::x86_avx2_gather_d_pd_256: 2072245431Sdim case Intrinsic::x86_avx2_gather_q_pd: 2073245431Sdim case Intrinsic::x86_avx2_gather_q_pd_256: 2074245431Sdim case Intrinsic::x86_avx2_gather_d_ps: 2075245431Sdim case Intrinsic::x86_avx2_gather_d_ps_256: 2076245431Sdim case Intrinsic::x86_avx2_gather_q_ps: 2077245431Sdim case Intrinsic::x86_avx2_gather_q_ps_256: 2078245431Sdim case Intrinsic::x86_avx2_gather_d_q: 2079245431Sdim case Intrinsic::x86_avx2_gather_d_q_256: 2080245431Sdim case Intrinsic::x86_avx2_gather_q_q: 2081245431Sdim case Intrinsic::x86_avx2_gather_q_q_256: 2082245431Sdim case Intrinsic::x86_avx2_gather_d_d: 2083245431Sdim case Intrinsic::x86_avx2_gather_d_d_256: 2084245431Sdim case Intrinsic::x86_avx2_gather_q_d: 2085245431Sdim case Intrinsic::x86_avx2_gather_q_d_256: { 2086263509Sdim if (!Subtarget->hasAVX2()) 2087263509Sdim break; 2088245431Sdim unsigned Opc; 2089245431Sdim switch (IntNo) { 2090245431Sdim default: llvm_unreachable("Impossible intrinsic"); 2091245431Sdim case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; 2092245431Sdim case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; 2093245431Sdim case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; 2094245431Sdim case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; 2095245431Sdim case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; 2096245431Sdim case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; 2097245431Sdim case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; 2098245431Sdim case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; 2099245431Sdim case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; 2100245431Sdim case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; 2101245431Sdim case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; 2102245431Sdim case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; 2103245431Sdim case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; 2104245431Sdim case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; 2105245431Sdim case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; 2106245431Sdim case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; 2107245431Sdim } 2108245431Sdim SDNode *RetVal = SelectGather(Node, Opc); 2109245431Sdim if (RetVal) 2110245431Sdim // We already called ReplaceUses inside SelectGather. 2111245431Sdim return NULL; 2112245431Sdim break; 2113245431Sdim } 2114245431Sdim } 2115245431Sdim break; 2116245431Sdim } 2117198090Srdivacky case X86ISD::GlobalBaseReg: 2118198090Srdivacky return getGlobalBaseReg(); 2119193323Sed 2120245431Sdim 2121198090Srdivacky case X86ISD::ATOMOR64_DAG: 2122198090Srdivacky case X86ISD::ATOMXOR64_DAG: 2123198090Srdivacky case X86ISD::ATOMADD64_DAG: 2124198090Srdivacky case X86ISD::ATOMSUB64_DAG: 2125198090Srdivacky case X86ISD::ATOMNAND64_DAG: 2126198090Srdivacky case X86ISD::ATOMAND64_DAG: 2127245431Sdim case X86ISD::ATOMMAX64_DAG: 2128245431Sdim case X86ISD::ATOMMIN64_DAG: 2129245431Sdim case X86ISD::ATOMUMAX64_DAG: 2130245431Sdim case X86ISD::ATOMUMIN64_DAG: 2131245431Sdim case X86ISD::ATOMSWAP64_DAG: { 2132245431Sdim unsigned Opc; 2133245431Sdim switch (Opcode) { 2134245431Sdim default: llvm_unreachable("Impossible opcode"); 2135245431Sdim case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; 2136245431Sdim case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; 2137245431Sdim case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; 2138245431Sdim case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; 2139245431Sdim case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; 2140245431Sdim case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; 2141245431Sdim case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; 2142245431Sdim case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; 2143245431Sdim case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; 2144245431Sdim case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; 2145245431Sdim case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; 2146245431Sdim } 2147245431Sdim SDNode *RetVal = SelectAtomic64(Node, Opc); 2148198090Srdivacky if (RetVal) 2149198090Srdivacky return RetVal; 2150198090Srdivacky break; 2151198090Srdivacky } 2152245431Sdim 2153223017Sdim case ISD::ATOMIC_LOAD_XOR: 2154223017Sdim case ISD::ATOMIC_LOAD_AND: 2155245431Sdim case ISD::ATOMIC_LOAD_OR: 2156245431Sdim case ISD::ATOMIC_LOAD_ADD: { 2157223017Sdim SDNode *RetVal = SelectAtomicLoadArith(Node, NVT); 2158223017Sdim if (RetVal) 2159223017Sdim return RetVal; 2160223017Sdim break; 2161223017Sdim } 2162221345Sdim case ISD::AND: 2163221345Sdim case ISD::OR: 2164221345Sdim case ISD::XOR: { 2165221345Sdim // For operations of the form (x << C1) op C2, check if we can use a smaller 2166221345Sdim // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. 2167221345Sdim SDValue N0 = Node->getOperand(0); 2168221345Sdim SDValue N1 = Node->getOperand(1); 2169221345Sdim 2170221345Sdim if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse()) 2171221345Sdim break; 2172221345Sdim 2173221345Sdim // i8 is unshrinkable, i16 should be promoted to i32. 2174221345Sdim if (NVT != MVT::i32 && NVT != MVT::i64) 2175221345Sdim break; 2176221345Sdim 2177221345Sdim ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 2178221345Sdim ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2179221345Sdim if (!Cst || !ShlCst) 2180221345Sdim break; 2181221345Sdim 2182221345Sdim int64_t Val = Cst->getSExtValue(); 2183221345Sdim uint64_t ShlVal = ShlCst->getZExtValue(); 2184221345Sdim 2185221345Sdim // Make sure that we don't change the operation by removing bits. 2186221345Sdim // This only matters for OR and XOR, AND is unaffected. 2187245431Sdim uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1; 2188245431Sdim if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 2189221345Sdim break; 2190221345Sdim 2191245431Sdim unsigned ShlOp, Op; 2192263509Sdim MVT CstVT = NVT; 2193221345Sdim 2194221345Sdim // Check the minimum bitwidth for the new constant. 2195221345Sdim // TODO: AND32ri is the same as AND64ri32 with zext imm. 2196221345Sdim // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr 2197221345Sdim // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. 2198221345Sdim if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal)) 2199221345Sdim CstVT = MVT::i8; 2200221345Sdim else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal)) 2201221345Sdim CstVT = MVT::i32; 2202221345Sdim 2203221345Sdim // Bail if there is no smaller encoding. 2204221345Sdim if (NVT == CstVT) 2205221345Sdim break; 2206221345Sdim 2207263509Sdim switch (NVT.SimpleTy) { 2208221345Sdim default: llvm_unreachable("Unsupported VT!"); 2209221345Sdim case MVT::i32: 2210221345Sdim assert(CstVT == MVT::i8); 2211221345Sdim ShlOp = X86::SHL32ri; 2212221345Sdim 2213221345Sdim switch (Opcode) { 2214245431Sdim default: llvm_unreachable("Impossible opcode"); 2215221345Sdim case ISD::AND: Op = X86::AND32ri8; break; 2216221345Sdim case ISD::OR: Op = X86::OR32ri8; break; 2217221345Sdim case ISD::XOR: Op = X86::XOR32ri8; break; 2218221345Sdim } 2219221345Sdim break; 2220221345Sdim case MVT::i64: 2221221345Sdim assert(CstVT == MVT::i8 || CstVT == MVT::i32); 2222221345Sdim ShlOp = X86::SHL64ri; 2223221345Sdim 2224221345Sdim switch (Opcode) { 2225245431Sdim default: llvm_unreachable("Impossible opcode"); 2226221345Sdim case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; 2227221345Sdim case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; 2228221345Sdim case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; 2229221345Sdim } 2230221345Sdim break; 2231221345Sdim } 2232221345Sdim 2233221345Sdim // Emit the smaller op and the shift. 2234221345Sdim SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT); 2235221345Sdim SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); 2236221345Sdim return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), 2237221345Sdim getI8Imm(ShlVal)); 2238221345Sdim } 2239218893Sdim case X86ISD::UMUL: { 2240218893Sdim SDValue N0 = Node->getOperand(0); 2241218893Sdim SDValue N1 = Node->getOperand(1); 2242245431Sdim 2243218893Sdim unsigned LoReg; 2244263509Sdim switch (NVT.SimpleTy) { 2245218893Sdim default: llvm_unreachable("Unsupported VT!"); 2246218893Sdim case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; 2247218893Sdim case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; 2248218893Sdim case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; 2249218893Sdim case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; 2250218893Sdim } 2251245431Sdim 2252218893Sdim SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 2253218893Sdim N0, SDValue()).getValue(1); 2254245431Sdim 2255218893Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); 2256218893Sdim SDValue Ops[] = {N1, InFlag}; 2257252723Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2258245431Sdim 2259218893Sdim ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); 2260218893Sdim ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); 2261218893Sdim ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); 2262218893Sdim return NULL; 2263218893Sdim } 2264245431Sdim 2265198090Srdivacky case ISD::SMUL_LOHI: 2266198090Srdivacky case ISD::UMUL_LOHI: { 2267198090Srdivacky SDValue N0 = Node->getOperand(0); 2268198090Srdivacky SDValue N1 = Node->getOperand(1); 2269193323Sed 2270198090Srdivacky bool isSigned = Opcode == ISD::SMUL_LOHI; 2271245431Sdim bool hasBMI2 = Subtarget->hasBMI2(); 2272198090Srdivacky if (!isSigned) { 2273263509Sdim switch (NVT.SimpleTy) { 2274198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2275198090Srdivacky case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 2276198090Srdivacky case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 2277245431Sdim case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; 2278245431Sdim MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; 2279245431Sdim case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; 2280245431Sdim MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; 2281193323Sed } 2282198090Srdivacky } else { 2283263509Sdim switch (NVT.SimpleTy) { 2284198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2285198090Srdivacky case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 2286198090Srdivacky case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 2287198090Srdivacky case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 2288198090Srdivacky case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 2289193323Sed } 2290198090Srdivacky } 2291193323Sed 2292245431Sdim unsigned SrcReg, LoReg, HiReg; 2293245431Sdim switch (Opc) { 2294245431Sdim default: llvm_unreachable("Unknown MUL opcode!"); 2295245431Sdim case X86::IMUL8r: 2296245431Sdim case X86::MUL8r: 2297245431Sdim SrcReg = LoReg = X86::AL; HiReg = X86::AH; 2298245431Sdim break; 2299245431Sdim case X86::IMUL16r: 2300245431Sdim case X86::MUL16r: 2301245431Sdim SrcReg = LoReg = X86::AX; HiReg = X86::DX; 2302245431Sdim break; 2303245431Sdim case X86::IMUL32r: 2304245431Sdim case X86::MUL32r: 2305245431Sdim SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; 2306245431Sdim break; 2307245431Sdim case X86::IMUL64r: 2308245431Sdim case X86::MUL64r: 2309245431Sdim SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; 2310245431Sdim break; 2311245431Sdim case X86::MULX32rr: 2312245431Sdim SrcReg = X86::EDX; LoReg = HiReg = 0; 2313245431Sdim break; 2314245431Sdim case X86::MULX64rr: 2315245431Sdim SrcReg = X86::RDX; LoReg = HiReg = 0; 2316245431Sdim break; 2317198090Srdivacky } 2318193323Sed 2319198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 2320202375Srdivacky bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2321198090Srdivacky // Multiply is commmutative. 2322198090Srdivacky if (!foldedLoad) { 2323202375Srdivacky foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2324198090Srdivacky if (foldedLoad) 2325198090Srdivacky std::swap(N0, N1); 2326198090Srdivacky } 2327193323Sed 2328245431Sdim SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, 2329245431Sdim N0, SDValue()).getValue(1); 2330245431Sdim SDValue ResHi, ResLo; 2331198090Srdivacky 2332198090Srdivacky if (foldedLoad) { 2333245431Sdim SDValue Chain; 2334198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 2335198090Srdivacky InFlag }; 2336245431Sdim if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { 2337245431Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); 2338252723Sdim SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); 2339245431Sdim ResHi = SDValue(CNode, 0); 2340245431Sdim ResLo = SDValue(CNode, 1); 2341245431Sdim Chain = SDValue(CNode, 2); 2342245431Sdim InFlag = SDValue(CNode, 3); 2343245431Sdim } else { 2344245431Sdim SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 2345252723Sdim SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); 2346245431Sdim Chain = SDValue(CNode, 0); 2347245431Sdim InFlag = SDValue(CNode, 1); 2348245431Sdim } 2349218893Sdim 2350198090Srdivacky // Update the chain. 2351245431Sdim ReplaceUses(N1.getValue(1), Chain); 2352198090Srdivacky } else { 2353245431Sdim SDValue Ops[] = { N1, InFlag }; 2354245431Sdim if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { 2355245431Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); 2356252723Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2357245431Sdim ResHi = SDValue(CNode, 0); 2358245431Sdim ResLo = SDValue(CNode, 1); 2359245431Sdim InFlag = SDValue(CNode, 2); 2360245431Sdim } else { 2361245431Sdim SDVTList VTs = CurDAG->getVTList(MVT::Glue); 2362252723Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2363245431Sdim InFlag = SDValue(CNode, 0); 2364245431Sdim } 2365198090Srdivacky } 2366198090Srdivacky 2367210299Sed // Prevent use of AH in a REX instruction by referencing AX instead. 2368210299Sed if (HiReg == X86::AH && Subtarget->is64Bit() && 2369210299Sed !SDValue(Node, 1).use_empty()) { 2370210299Sed SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2371210299Sed X86::AX, MVT::i16, InFlag); 2372210299Sed InFlag = Result.getValue(2); 2373210299Sed // Get the low part if needed. Don't use getCopyFromReg for aliasing 2374210299Sed // registers. 2375210299Sed if (!SDValue(Node, 0).use_empty()) 2376210299Sed ReplaceUses(SDValue(Node, 1), 2377210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2378210299Sed 2379210299Sed // Shift AX down 8 bits. 2380210299Sed Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2381210299Sed Result, 2382210299Sed CurDAG->getTargetConstant(8, MVT::i8)), 0); 2383210299Sed // Then truncate it down to i8. 2384210299Sed ReplaceUses(SDValue(Node, 1), 2385210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2386210299Sed } 2387198090Srdivacky // Copy the low half of the result, if it is needed. 2388202375Srdivacky if (!SDValue(Node, 0).use_empty()) { 2389245431Sdim if (ResLo.getNode() == 0) { 2390245431Sdim assert(LoReg && "Register for low half is not defined!"); 2391245431Sdim ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, 2392245431Sdim InFlag); 2393245431Sdim InFlag = ResLo.getValue(2); 2394245431Sdim } 2395245431Sdim ReplaceUses(SDValue(Node, 0), ResLo); 2396245431Sdim DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); 2397198090Srdivacky } 2398198090Srdivacky // Copy the high half of the result, if it is needed. 2399202375Srdivacky if (!SDValue(Node, 1).use_empty()) { 2400245431Sdim if (ResHi.getNode() == 0) { 2401245431Sdim assert(HiReg && "Register for high half is not defined!"); 2402245431Sdim ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, 2403245431Sdim InFlag); 2404245431Sdim InFlag = ResHi.getValue(2); 2405245431Sdim } 2406245431Sdim ReplaceUses(SDValue(Node, 1), ResHi); 2407245431Sdim DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); 2408198090Srdivacky } 2409245431Sdim 2410198090Srdivacky return NULL; 2411198090Srdivacky } 2412193323Sed 2413198090Srdivacky case ISD::SDIVREM: 2414198090Srdivacky case ISD::UDIVREM: { 2415198090Srdivacky SDValue N0 = Node->getOperand(0); 2416198090Srdivacky SDValue N1 = Node->getOperand(1); 2417193323Sed 2418198090Srdivacky bool isSigned = Opcode == ISD::SDIVREM; 2419198090Srdivacky if (!isSigned) { 2420263509Sdim switch (NVT.SimpleTy) { 2421198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2422198090Srdivacky case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 2423198090Srdivacky case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 2424198090Srdivacky case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 2425198090Srdivacky case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 2426193323Sed } 2427198090Srdivacky } else { 2428263509Sdim switch (NVT.SimpleTy) { 2429198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2430198090Srdivacky case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 2431198090Srdivacky case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 2432198090Srdivacky case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 2433198090Srdivacky case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 2434198090Srdivacky } 2435198090Srdivacky } 2436193323Sed 2437201360Srdivacky unsigned LoReg, HiReg, ClrReg; 2438263509Sdim unsigned SExtOpcode; 2439263509Sdim switch (NVT.SimpleTy) { 2440198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2441198090Srdivacky case MVT::i8: 2442201360Srdivacky LoReg = X86::AL; ClrReg = HiReg = X86::AH; 2443198090Srdivacky SExtOpcode = X86::CBW; 2444198090Srdivacky break; 2445198090Srdivacky case MVT::i16: 2446198090Srdivacky LoReg = X86::AX; HiReg = X86::DX; 2447263509Sdim ClrReg = X86::DX; 2448198090Srdivacky SExtOpcode = X86::CWD; 2449198090Srdivacky break; 2450198090Srdivacky case MVT::i32: 2451201360Srdivacky LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; 2452198090Srdivacky SExtOpcode = X86::CDQ; 2453198090Srdivacky break; 2454198090Srdivacky case MVT::i64: 2455201360Srdivacky LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; 2456198090Srdivacky SExtOpcode = X86::CQO; 2457198090Srdivacky break; 2458198090Srdivacky } 2459193323Sed 2460198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 2461202375Srdivacky bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2462198090Srdivacky bool signBitIsZero = CurDAG->SignBitIsZero(N0); 2463198090Srdivacky 2464198090Srdivacky SDValue InFlag; 2465198090Srdivacky if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 2466198090Srdivacky // Special case for div8, just use a move with zero extension to AX to 2467198090Srdivacky // clear the upper 8 bits (AH). 2468198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 2469202375Srdivacky if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 2470198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 2471198090Srdivacky Move = 2472223017Sdim SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, 2473252723Sdim MVT::Other, Ops), 0); 2474198090Srdivacky Chain = Move.getValue(1); 2475198090Srdivacky ReplaceUses(N0.getValue(1), Chain); 2476193323Sed } else { 2477198090Srdivacky Move = 2478223017Sdim SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0); 2479198090Srdivacky Chain = CurDAG->getEntryNode(); 2480198090Srdivacky } 2481223017Sdim Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue()); 2482198090Srdivacky InFlag = Chain.getValue(1); 2483198090Srdivacky } else { 2484198090Srdivacky InFlag = 2485198090Srdivacky CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 2486198090Srdivacky LoReg, N0, SDValue()).getValue(1); 2487198090Srdivacky if (isSigned && !signBitIsZero) { 2488198090Srdivacky // Sign extend the low part into the high part. 2489193323Sed InFlag = 2490218893Sdim SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); 2491198090Srdivacky } else { 2492198090Srdivacky // Zero out the high part, effectively zero extending the input. 2493263509Sdim SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); 2494263509Sdim switch (NVT.SimpleTy) { 2495263509Sdim case MVT::i16: 2496263509Sdim ClrNode = 2497263509Sdim SDValue(CurDAG->getMachineNode( 2498263509Sdim TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, 2499263509Sdim CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)), 2500263509Sdim 0); 2501263509Sdim break; 2502263509Sdim case MVT::i32: 2503263509Sdim break; 2504263509Sdim case MVT::i64: 2505263509Sdim ClrNode = 2506263509Sdim SDValue(CurDAG->getMachineNode( 2507263509Sdim TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, 2508263509Sdim CurDAG->getTargetConstant(0, MVT::i64), ClrNode, 2509263509Sdim CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 2510263509Sdim 0); 2511263509Sdim break; 2512263509Sdim default: 2513263509Sdim llvm_unreachable("Unexpected division source"); 2514263509Sdim } 2515263509Sdim 2516201360Srdivacky InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, 2517198090Srdivacky ClrNode, InFlag).getValue(1); 2518193323Sed } 2519198090Srdivacky } 2520193323Sed 2521198090Srdivacky if (foldedLoad) { 2522198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 2523198090Srdivacky InFlag }; 2524198090Srdivacky SDNode *CNode = 2525252723Sdim CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); 2526198090Srdivacky InFlag = SDValue(CNode, 1); 2527198090Srdivacky // Update the chain. 2528198090Srdivacky ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 2529198090Srdivacky } else { 2530198090Srdivacky InFlag = 2531218893Sdim SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); 2532198090Srdivacky } 2533198090Srdivacky 2534210299Sed // Prevent use of AH in a REX instruction by referencing AX instead. 2535210299Sed // Shift it down 8 bits. 2536263509Sdim // 2537263509Sdim // The current assumption of the register allocator is that isel 2538263509Sdim // won't generate explicit references to the GPR8_NOREX registers. If 2539263509Sdim // the allocator and/or the backend get enhanced to be more robust in 2540263509Sdim // that regard, this can be, and should be, removed. 2541210299Sed if (HiReg == X86::AH && Subtarget->is64Bit() && 2542210299Sed !SDValue(Node, 1).use_empty()) { 2543210299Sed SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2544210299Sed X86::AX, MVT::i16, InFlag); 2545210299Sed InFlag = Result.getValue(2); 2546210299Sed 2547210299Sed // If we also need AL (the quotient), get it by extracting a subreg from 2548210299Sed // Result. The fast register allocator does not like multiple CopyFromReg 2549210299Sed // nodes using aliasing registers. 2550210299Sed if (!SDValue(Node, 0).use_empty()) 2551210299Sed ReplaceUses(SDValue(Node, 0), 2552210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2553210299Sed 2554210299Sed // Shift AX right by 8 bits instead of using AH. 2555210299Sed Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2556210299Sed Result, 2557210299Sed CurDAG->getTargetConstant(8, MVT::i8)), 2558210299Sed 0); 2559210299Sed ReplaceUses(SDValue(Node, 1), 2560210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2561210299Sed } 2562198090Srdivacky // Copy the division (low) result, if it is needed. 2563202375Srdivacky if (!SDValue(Node, 0).use_empty()) { 2564198090Srdivacky SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2565198090Srdivacky LoReg, NVT, InFlag); 2566198090Srdivacky InFlag = Result.getValue(2); 2567202375Srdivacky ReplaceUses(SDValue(Node, 0), Result); 2568204642Srdivacky DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 2569198090Srdivacky } 2570198090Srdivacky // Copy the remainder (high) result, if it is needed. 2571202375Srdivacky if (!SDValue(Node, 1).use_empty()) { 2572210299Sed SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2573210299Sed HiReg, NVT, InFlag); 2574210299Sed InFlag = Result.getValue(2); 2575202375Srdivacky ReplaceUses(SDValue(Node, 1), Result); 2576204642Srdivacky DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 2577198090Srdivacky } 2578198090Srdivacky return NULL; 2579198090Srdivacky } 2580193323Sed 2581245431Sdim case X86ISD::CMP: 2582245431Sdim case X86ISD::SUB: { 2583245431Sdim // Sometimes a SUB is used to perform comparison. 2584245431Sdim if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) 2585245431Sdim // This node is not a CMP. 2586245431Sdim break; 2587198090Srdivacky SDValue N0 = Node->getOperand(0); 2588198090Srdivacky SDValue N1 = Node->getOperand(1); 2589198090Srdivacky 2590198090Srdivacky // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 2591198090Srdivacky // use a smaller encoding. 2592212904Sdim if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 2593212904Sdim HasNoSignedComparisonUses(Node)) 2594207618Srdivacky // Look past the truncate if CMP is the only use of it. 2595207618Srdivacky N0 = N0.getOperand(0); 2596235633Sdim if ((N0.getNode()->getOpcode() == ISD::AND || 2597235633Sdim (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && 2598235633Sdim N0.getNode()->hasOneUse() && 2599198090Srdivacky N0.getValueType() != MVT::i8 && 2600198090Srdivacky X86::isZeroNode(N1)) { 2601198090Srdivacky ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 2602198090Srdivacky if (!C) break; 2603198090Srdivacky 2604198090Srdivacky // For example, convert "testl %eax, $8" to "testb %al, $8" 2605198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 2606198090Srdivacky (!(C->getZExtValue() & 0x80) || 2607198090Srdivacky HasNoSignedComparisonUses(Node))) { 2608198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); 2609198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2610198090Srdivacky 2611198090Srdivacky // On x86-32, only the ABCD registers have 8-bit subregisters. 2612198090Srdivacky if (!Subtarget->is64Bit()) { 2613235633Sdim const TargetRegisterClass *TRC; 2614263509Sdim switch (N0.getSimpleValueType().SimpleTy) { 2615198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2616198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2617198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2618198090Srdivacky } 2619198090Srdivacky SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2620198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2621198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2622198090Srdivacky } 2623198090Srdivacky 2624198090Srdivacky // Extract the l-register. 2625208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, 2626198090Srdivacky MVT::i8, Reg); 2627198090Srdivacky 2628198090Srdivacky // Emit a testb. 2629245431Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 2630245431Sdim Subreg, Imm); 2631245431Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2632245431Sdim // one, do not call ReplaceAllUsesWith. 2633245431Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2634245431Sdim SDValue(NewNode, 0)); 2635245431Sdim return NULL; 2636193323Sed } 2637198090Srdivacky 2638198090Srdivacky // For example, "testl %eax, $2048" to "testb %ah, $8". 2639198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 2640198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2641198090Srdivacky HasNoSignedComparisonUses(Node))) { 2642198090Srdivacky // Shift the immediate right by 8 bits. 2643198090Srdivacky SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 2644198090Srdivacky MVT::i8); 2645198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2646198090Srdivacky 2647198090Srdivacky // Put the value in an ABCD register. 2648235633Sdim const TargetRegisterClass *TRC; 2649263509Sdim switch (N0.getSimpleValueType().SimpleTy) { 2650198090Srdivacky case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 2651198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2652198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2653198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2654198090Srdivacky } 2655198090Srdivacky SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2656198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2657198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2658198090Srdivacky 2659198090Srdivacky // Extract the h-register. 2660208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, 2661198090Srdivacky MVT::i8, Reg); 2662198090Srdivacky 2663226890Sdim // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only 2664226890Sdim // target GR8_NOREX registers, so make sure the register class is 2665226890Sdim // forced. 2666245431Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, 2667245431Sdim MVT::i32, Subreg, ShiftedImm); 2668245431Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2669245431Sdim // one, do not call ReplaceAllUsesWith. 2670245431Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2671245431Sdim SDValue(NewNode, 0)); 2672245431Sdim return NULL; 2673193323Sed } 2674198090Srdivacky 2675198090Srdivacky // For example, "testl %eax, $32776" to "testw %ax, $32776". 2676198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 2677198090Srdivacky N0.getValueType() != MVT::i16 && 2678198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2679198090Srdivacky HasNoSignedComparisonUses(Node))) { 2680198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); 2681198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2682198090Srdivacky 2683198090Srdivacky // Extract the 16-bit subregister. 2684208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, 2685198090Srdivacky MVT::i16, Reg); 2686198090Srdivacky 2687198090Srdivacky // Emit a testw. 2688245431Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, 2689245431Sdim Subreg, Imm); 2690245431Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2691245431Sdim // one, do not call ReplaceAllUsesWith. 2692245431Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2693245431Sdim SDValue(NewNode, 0)); 2694245431Sdim return NULL; 2695193323Sed } 2696198090Srdivacky 2697198090Srdivacky // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 2698198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 2699198090Srdivacky N0.getValueType() == MVT::i64 && 2700198090Srdivacky (!(C->getZExtValue() & 0x80000000) || 2701198090Srdivacky HasNoSignedComparisonUses(Node))) { 2702198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 2703198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2704198090Srdivacky 2705198090Srdivacky // Extract the 32-bit subregister. 2706208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, 2707198090Srdivacky MVT::i32, Reg); 2708198090Srdivacky 2709198090Srdivacky // Emit a testl. 2710245431Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, 2711245431Sdim Subreg, Imm); 2712245431Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2713245431Sdim // one, do not call ReplaceAllUsesWith. 2714245431Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2715245431Sdim SDValue(NewNode, 0)); 2716245431Sdim return NULL; 2717198090Srdivacky } 2718193323Sed } 2719198090Srdivacky break; 2720193323Sed } 2721235633Sdim case ISD::STORE: { 2722235633Sdim // Change a chain of {load; incr or dec; store} of the same value into 2723235633Sdim // a simple increment or decrement through memory of that value, if the 2724235633Sdim // uses of the modified value and its address are suitable. 2725235633Sdim // The DEC64m tablegen pattern is currently not able to match the case where 2726245431Sdim // the EFLAGS on the original DEC are used. (This also applies to 2727235633Sdim // {INC,DEC}X{64,32,16,8}.) 2728235633Sdim // We'll need to improve tablegen to allow flags to be transferred from a 2729235633Sdim // node in the pattern to the result node. probably with a new keyword 2730235633Sdim // for example, we have this 2731235633Sdim // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", 2732235633Sdim // [(store (add (loadi64 addr:$dst), -1), addr:$dst), 2733235633Sdim // (implicit EFLAGS)]>; 2734235633Sdim // but maybe need something like this 2735235633Sdim // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", 2736235633Sdim // [(store (add (loadi64 addr:$dst), -1), addr:$dst), 2737235633Sdim // (transferrable EFLAGS)]>; 2738235633Sdim 2739235633Sdim StoreSDNode *StoreNode = cast<StoreSDNode>(Node); 2740235633Sdim SDValue StoredVal = StoreNode->getOperand(1); 2741235633Sdim unsigned Opc = StoredVal->getOpcode(); 2742235633Sdim 2743235633Sdim LoadSDNode *LoadNode = 0; 2744235633Sdim SDValue InputChain; 2745235633Sdim if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, 2746235633Sdim LoadNode, InputChain)) 2747235633Sdim break; 2748235633Sdim 2749235633Sdim SDValue Base, Scale, Index, Disp, Segment; 2750235633Sdim if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), 2751235633Sdim Base, Scale, Index, Disp, Segment)) 2752235633Sdim break; 2753235633Sdim 2754235633Sdim MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2); 2755235633Sdim MemOp[0] = StoreNode->getMemOperand(); 2756235633Sdim MemOp[1] = LoadNode->getMemOperand(); 2757235633Sdim const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; 2758245431Sdim EVT LdVT = LoadNode->getMemoryVT(); 2759235633Sdim unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); 2760235633Sdim MachineSDNode *Result = CurDAG->getMachineNode(newOpc, 2761263509Sdim SDLoc(Node), 2762252723Sdim MVT::i32, MVT::Other, Ops); 2763235633Sdim Result->setMemRefs(MemOp, MemOp + 2); 2764235633Sdim 2765235633Sdim ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); 2766235633Sdim ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); 2767235633Sdim 2768235633Sdim return Result; 2769198090Srdivacky } 2770235633Sdim } 2771193323Sed 2772202375Srdivacky SDNode *ResNode = SelectCode(Node); 2773193323Sed 2774204642Srdivacky DEBUG(dbgs() << "=> "; 2775204642Srdivacky if (ResNode == NULL || ResNode == Node) 2776204642Srdivacky Node->dump(CurDAG); 2777204642Srdivacky else 2778204642Srdivacky ResNode->dump(CurDAG); 2779204642Srdivacky dbgs() << '\n'); 2780193323Sed 2781193323Sed return ResNode; 2782193323Sed} 2783193323Sed 2784193323Sedbool X86DAGToDAGISel:: 2785193323SedSelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 2786193323Sed std::vector<SDValue> &OutOps) { 2787193323Sed SDValue Op0, Op1, Op2, Op3, Op4; 2788193323Sed switch (ConstraintCode) { 2789193323Sed case 'o': // offsetable ?? 2790193323Sed case 'v': // not offsetable ?? 2791193323Sed default: return true; 2792193323Sed case 'm': // memory 2793218893Sdim if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4)) 2794193323Sed return true; 2795193323Sed break; 2796193323Sed } 2797245431Sdim 2798193323Sed OutOps.push_back(Op0); 2799193323Sed OutOps.push_back(Op1); 2800193323Sed OutOps.push_back(Op2); 2801193323Sed OutOps.push_back(Op3); 2802193323Sed OutOps.push_back(Op4); 2803193323Sed return false; 2804193323Sed} 2805193323Sed 2806245431Sdim/// createX86ISelDag - This pass converts a legalized DAG into a 2807193323Sed/// X86-specific DAG, ready for instruction scheduling. 2808193323Sed/// 2809193323SedFunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 2810235633Sdim CodeGenOpt::Level OptLevel) { 2811193323Sed return new X86DAGToDAGISel(TM, OptLevel); 2812193323Sed} 2813