X86ISelDAGToDAG.cpp revision 198892
1193323Sed//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file defines a DAG pattern matching instruction selector for X86, 11193323Sed// converting from a legalized dag to a X86 dag. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#define DEBUG_TYPE "x86-isel" 16193323Sed#include "X86.h" 17193323Sed#include "X86InstrBuilder.h" 18193323Sed#include "X86ISelLowering.h" 19193323Sed#include "X86MachineFunctionInfo.h" 20193323Sed#include "X86RegisterInfo.h" 21193323Sed#include "X86Subtarget.h" 22193323Sed#include "X86TargetMachine.h" 23193323Sed#include "llvm/GlobalValue.h" 24193323Sed#include "llvm/Instructions.h" 25193323Sed#include "llvm/Intrinsics.h" 26193323Sed#include "llvm/Support/CFG.h" 27193323Sed#include "llvm/Type.h" 28193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 29193323Sed#include "llvm/CodeGen/MachineFunction.h" 30193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 31193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 32193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 33193323Sed#include "llvm/CodeGen/SelectionDAGISel.h" 34193323Sed#include "llvm/Target/TargetMachine.h" 35193323Sed#include "llvm/Target/TargetOptions.h" 36193323Sed#include "llvm/Support/Debug.h" 37198090Srdivacky#include "llvm/Support/ErrorHandling.h" 38193323Sed#include "llvm/Support/MathExtras.h" 39198090Srdivacky#include "llvm/Support/raw_ostream.h" 40193323Sed#include "llvm/ADT/SmallPtrSet.h" 41193323Sed#include "llvm/ADT/Statistic.h" 42193323Sedusing namespace llvm; 43193323Sed 44193323Sed#include "llvm/Support/CommandLine.h" 45193323Sedstatic cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); 46193323Sed 47193323SedSTATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 48193323Sed 49193323Sed//===----------------------------------------------------------------------===// 50193323Sed// Pattern Matcher Implementation 51193323Sed//===----------------------------------------------------------------------===// 52193323Sed 53193323Sednamespace { 54193323Sed /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 55193323Sed /// SDValue's instead of register numbers for the leaves of the matched 56193323Sed /// tree. 57193323Sed struct X86ISelAddressMode { 58193323Sed enum { 59193323Sed RegBase, 60193323Sed FrameIndexBase 61193323Sed } BaseType; 62193323Sed 63193323Sed struct { // This is really a union, discriminated by BaseType! 64193323Sed SDValue Reg; 65193323Sed int FrameIndex; 66193323Sed } Base; 67193323Sed 68193323Sed unsigned Scale; 69193323Sed SDValue IndexReg; 70193323Sed int32_t Disp; 71193323Sed SDValue Segment; 72193323Sed GlobalValue *GV; 73193323Sed Constant *CP; 74198892Srdivacky BlockAddress *BlockAddr; 75193323Sed const char *ES; 76193323Sed int JT; 77193323Sed unsigned Align; // CP alignment. 78195098Sed unsigned char SymbolFlags; // X86II::MO_* 79193323Sed 80193323Sed X86ISelAddressMode() 81195098Sed : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), 82198892Srdivacky Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), 83198090Srdivacky SymbolFlags(X86II::MO_NO_FLAG) { 84193323Sed } 85193323Sed 86193323Sed bool hasSymbolicDisplacement() const { 87198892Srdivacky return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; 88193323Sed } 89195098Sed 90195098Sed bool hasBaseOrIndexReg() const { 91195098Sed return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; 92195098Sed } 93195098Sed 94195098Sed /// isRIPRelative - Return true if this addressing mode is already RIP 95195098Sed /// relative. 96195098Sed bool isRIPRelative() const { 97195098Sed if (BaseType != RegBase) return false; 98195098Sed if (RegisterSDNode *RegNode = 99195098Sed dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) 100195098Sed return RegNode->getReg() == X86::RIP; 101195098Sed return false; 102195098Sed } 103195098Sed 104195098Sed void setBaseReg(SDValue Reg) { 105195098Sed BaseType = RegBase; 106195098Sed Base.Reg = Reg; 107195098Sed } 108193323Sed 109193323Sed void dump() { 110198090Srdivacky errs() << "X86ISelAddressMode " << this << '\n'; 111198090Srdivacky errs() << "Base.Reg "; 112198090Srdivacky if (Base.Reg.getNode() != 0) 113198090Srdivacky Base.Reg.getNode()->dump(); 114198090Srdivacky else 115198090Srdivacky errs() << "nul"; 116198090Srdivacky errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' 117198090Srdivacky << " Scale" << Scale << '\n' 118198090Srdivacky << "IndexReg "; 119198090Srdivacky if (IndexReg.getNode() != 0) 120198090Srdivacky IndexReg.getNode()->dump(); 121198090Srdivacky else 122198090Srdivacky errs() << "nul"; 123198090Srdivacky errs() << " Disp " << Disp << '\n' 124198090Srdivacky << "GV "; 125198090Srdivacky if (GV) 126198090Srdivacky GV->dump(); 127198090Srdivacky else 128198090Srdivacky errs() << "nul"; 129198090Srdivacky errs() << " CP "; 130198090Srdivacky if (CP) 131198090Srdivacky CP->dump(); 132198090Srdivacky else 133198090Srdivacky errs() << "nul"; 134198090Srdivacky errs() << '\n' 135198090Srdivacky << "ES "; 136198090Srdivacky if (ES) 137198090Srdivacky errs() << ES; 138198090Srdivacky else 139198090Srdivacky errs() << "nul"; 140198090Srdivacky errs() << " JT" << JT << " Align" << Align << '\n'; 141193323Sed } 142193323Sed }; 143193323Sed} 144193323Sed 145193323Sednamespace { 146193323Sed //===--------------------------------------------------------------------===// 147193323Sed /// ISel - X86 specific code to select X86 machine instructions for 148193323Sed /// SelectionDAG operations. 149193323Sed /// 150198892Srdivacky class X86DAGToDAGISel : public SelectionDAGISel { 151193323Sed /// X86Lowering - This object fully describes how to lower LLVM code to an 152193323Sed /// X86-specific SelectionDAG. 153193323Sed X86TargetLowering &X86Lowering; 154193323Sed 155193323Sed /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 156193323Sed /// make the right decision when generating code for different targets. 157193323Sed const X86Subtarget *Subtarget; 158193323Sed 159193323Sed /// OptForSize - If true, selector should try to optimize for code size 160193323Sed /// instead of performance. 161193323Sed bool OptForSize; 162193323Sed 163193323Sed public: 164193323Sed explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 165193323Sed : SelectionDAGISel(tm, OptLevel), 166193399Sed X86Lowering(*tm.getTargetLowering()), 167193399Sed Subtarget(&tm.getSubtarget<X86Subtarget>()), 168193323Sed OptForSize(false) {} 169193323Sed 170193323Sed virtual const char *getPassName() const { 171193323Sed return "X86 DAG->DAG Instruction Selection"; 172193323Sed } 173193323Sed 174193323Sed /// InstructionSelect - This callback is invoked by 175193323Sed /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 176193323Sed virtual void InstructionSelect(); 177193323Sed 178193323Sed virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 179193323Sed 180193323Sed virtual 181193323Sed bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; 182193323Sed 183193323Sed// Include the pieces autogenerated from the target description. 184193323Sed#include "X86GenDAGISel.inc" 185193323Sed 186193323Sed private: 187193323Sed SDNode *Select(SDValue N); 188193323Sed SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 189198090Srdivacky SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); 190193323Sed 191193323Sed bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); 192193323Sed bool MatchLoad(SDValue N, X86ISelAddressMode &AM); 193193323Sed bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 194198090Srdivacky bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 195198090Srdivacky bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 196198090Srdivacky unsigned Depth); 197193323Sed bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 198193323Sed bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, 199193323Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 200193323Sed SDValue &Segment); 201193323Sed bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, 202193323Sed SDValue &Scale, SDValue &Index, SDValue &Disp); 203194612Sed bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 204194612Sed SDValue &Scale, SDValue &Index, SDValue &Disp); 205193323Sed bool SelectScalarSSELoad(SDValue Op, SDValue Pred, 206193323Sed SDValue N, SDValue &Base, SDValue &Scale, 207193323Sed SDValue &Index, SDValue &Disp, 208193323Sed SDValue &Segment, 209193323Sed SDValue &InChain, SDValue &OutChain); 210193323Sed bool TryFoldLoad(SDValue P, SDValue N, 211193323Sed SDValue &Base, SDValue &Scale, 212193323Sed SDValue &Index, SDValue &Disp, 213193323Sed SDValue &Segment); 214193323Sed void PreprocessForRMW(); 215193323Sed void PreprocessForFPConvert(); 216193323Sed 217193323Sed /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 218193323Sed /// inline asm expressions. 219193323Sed virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 220193323Sed char ConstraintCode, 221193323Sed std::vector<SDValue> &OutOps); 222193323Sed 223193323Sed void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 224193323Sed 225193323Sed inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 226193323Sed SDValue &Scale, SDValue &Index, 227193323Sed SDValue &Disp, SDValue &Segment) { 228193323Sed Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 229193323Sed CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 230193323Sed AM.Base.Reg; 231193323Sed Scale = getI8Imm(AM.Scale); 232193323Sed Index = AM.IndexReg; 233193323Sed // These are 32-bit even in 64-bit mode since RIP relative offset 234193323Sed // is 32-bit. 235193323Sed if (AM.GV) 236195098Sed Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, 237195098Sed AM.SymbolFlags); 238193323Sed else if (AM.CP) 239193323Sed Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 240195098Sed AM.Align, AM.Disp, AM.SymbolFlags); 241193323Sed else if (AM.ES) 242195098Sed Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 243193323Sed else if (AM.JT != -1) 244195098Sed Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 245198892Srdivacky else if (AM.BlockAddr) 246198892Srdivacky Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/, 247198892Srdivacky true /*AM.SymbolFlags*/); 248193323Sed else 249193323Sed Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 250193323Sed 251193323Sed if (AM.Segment.getNode()) 252193323Sed Segment = AM.Segment; 253193323Sed else 254193323Sed Segment = CurDAG->getRegister(0, MVT::i32); 255193323Sed } 256193323Sed 257193323Sed /// getI8Imm - Return a target constant with the specified value, of type 258193323Sed /// i8. 259193323Sed inline SDValue getI8Imm(unsigned Imm) { 260193323Sed return CurDAG->getTargetConstant(Imm, MVT::i8); 261193323Sed } 262193323Sed 263193323Sed /// getI16Imm - Return a target constant with the specified value, of type 264193323Sed /// i16. 265193323Sed inline SDValue getI16Imm(unsigned Imm) { 266193323Sed return CurDAG->getTargetConstant(Imm, MVT::i16); 267193323Sed } 268193323Sed 269193323Sed /// getI32Imm - Return a target constant with the specified value, of type 270193323Sed /// i32. 271193323Sed inline SDValue getI32Imm(unsigned Imm) { 272193323Sed return CurDAG->getTargetConstant(Imm, MVT::i32); 273193323Sed } 274193323Sed 275193323Sed /// getGlobalBaseReg - Return an SDNode that returns the value of 276193323Sed /// the global base register. Output instructions required to 277193323Sed /// initialize the global base register, if necessary. 278193323Sed /// 279193323Sed SDNode *getGlobalBaseReg(); 280193323Sed 281193399Sed /// getTargetMachine - Return a reference to the TargetMachine, casted 282193399Sed /// to the target-specific type. 283193399Sed const X86TargetMachine &getTargetMachine() { 284193399Sed return static_cast<const X86TargetMachine &>(TM); 285193399Sed } 286193399Sed 287193399Sed /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 288193399Sed /// to the target-specific type. 289193399Sed const X86InstrInfo *getInstrInfo() { 290193399Sed return getTargetMachine().getInstrInfo(); 291193399Sed } 292193399Sed 293193323Sed#ifndef NDEBUG 294193323Sed unsigned Indent; 295193323Sed#endif 296193323Sed }; 297193323Sed} 298193323Sed 299193323Sed 300193323Sedbool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, 301193323Sed SDNode *Root) const { 302193323Sed if (OptLevel == CodeGenOpt::None) return false; 303193323Sed 304193323Sed if (U == Root) 305193323Sed switch (U->getOpcode()) { 306193323Sed default: break; 307193323Sed case ISD::ADD: 308193323Sed case ISD::ADDC: 309193323Sed case ISD::ADDE: 310193323Sed case ISD::AND: 311193323Sed case ISD::OR: 312193323Sed case ISD::XOR: { 313193323Sed SDValue Op1 = U->getOperand(1); 314193323Sed 315193323Sed // If the other operand is a 8-bit immediate we should fold the immediate 316193323Sed // instead. This reduces code size. 317193323Sed // e.g. 318193323Sed // movl 4(%esp), %eax 319193323Sed // addl $4, %eax 320193323Sed // vs. 321193323Sed // movl $4, %eax 322193323Sed // addl 4(%esp), %eax 323193323Sed // The former is 2 bytes shorter. In case where the increment is 1, then 324193323Sed // the saving can be 4 bytes (by using incl %eax). 325193323Sed if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 326193323Sed if (Imm->getAPIntValue().isSignedIntN(8)) 327193323Sed return false; 328193323Sed 329193323Sed // If the other operand is a TLS address, we should fold it instead. 330193323Sed // This produces 331193323Sed // movl %gs:0, %eax 332193323Sed // leal i@NTPOFF(%eax), %eax 333193323Sed // instead of 334193323Sed // movl $i@NTPOFF, %eax 335193323Sed // addl %gs:0, %eax 336193323Sed // if the block also has an access to a second TLS address this will save 337193323Sed // a load. 338193323Sed // FIXME: This is probably also true for non TLS addresses. 339193323Sed if (Op1.getOpcode() == X86ISD::Wrapper) { 340193323Sed SDValue Val = Op1.getOperand(0); 341193323Sed if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 342193323Sed return false; 343193323Sed } 344193323Sed } 345193323Sed } 346193323Sed 347193323Sed // Proceed to 'generic' cycle finder code 348193323Sed return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); 349193323Sed} 350193323Sed 351193323Sed/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 352193323Sed/// and move load below the TokenFactor. Replace store's chain operand with 353193323Sed/// load's chain result. 354193323Sedstatic void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, 355193323Sed SDValue Store, SDValue TF) { 356193323Sed SmallVector<SDValue, 4> Ops; 357193323Sed for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) 358193323Sed if (Load.getNode() == TF.getOperand(i).getNode()) 359193323Sed Ops.push_back(Load.getOperand(0)); 360193323Sed else 361193323Sed Ops.push_back(TF.getOperand(i)); 362198090Srdivacky SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); 363198090Srdivacky SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, 364198090Srdivacky Load.getOperand(1), 365198090Srdivacky Load.getOperand(2)); 366198090Srdivacky CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), 367193323Sed Store.getOperand(2), Store.getOperand(3)); 368193323Sed} 369193323Sed 370198090Srdivacky/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The 371198090Srdivacky/// chain produced by the load must only be used by the store's chain operand, 372198090Srdivacky/// otherwise this may produce a cycle in the DAG. 373193323Sed/// 374193323Sedstatic bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, 375193323Sed SDValue &Load) { 376193323Sed if (N.getOpcode() == ISD::BIT_CONVERT) 377193323Sed N = N.getOperand(0); 378193323Sed 379193323Sed LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 380193323Sed if (!LD || LD->isVolatile()) 381193323Sed return false; 382193323Sed if (LD->getAddressingMode() != ISD::UNINDEXED) 383193323Sed return false; 384193323Sed 385193323Sed ISD::LoadExtType ExtType = LD->getExtensionType(); 386193323Sed if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 387193323Sed return false; 388193323Sed 389193323Sed if (N.hasOneUse() && 390198090Srdivacky LD->hasNUsesOfValue(1, 1) && 391193323Sed N.getOperand(1) == Address && 392198090Srdivacky LD->isOperandOf(Chain.getNode())) { 393193323Sed Load = N; 394193323Sed return true; 395193323Sed } 396193323Sed return false; 397193323Sed} 398193323Sed 399193323Sed/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain 400193323Sed/// operand and move load below the call's chain operand. 401193323Sedstatic void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, 402193323Sed SDValue Call, SDValue CallSeqStart) { 403193323Sed SmallVector<SDValue, 8> Ops; 404193323Sed SDValue Chain = CallSeqStart.getOperand(0); 405193323Sed if (Chain.getNode() == Load.getNode()) 406193323Sed Ops.push_back(Load.getOperand(0)); 407193323Sed else { 408193323Sed assert(Chain.getOpcode() == ISD::TokenFactor && 409193323Sed "Unexpected CallSeqStart chain operand"); 410193323Sed for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 411193323Sed if (Chain.getOperand(i).getNode() == Load.getNode()) 412193323Sed Ops.push_back(Load.getOperand(0)); 413193323Sed else 414193323Sed Ops.push_back(Chain.getOperand(i)); 415193323Sed SDValue NewChain = 416193323Sed CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), 417193323Sed MVT::Other, &Ops[0], Ops.size()); 418193323Sed Ops.clear(); 419193323Sed Ops.push_back(NewChain); 420193323Sed } 421193323Sed for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) 422193323Sed Ops.push_back(CallSeqStart.getOperand(i)); 423193323Sed CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); 424193323Sed CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), 425193323Sed Load.getOperand(1), Load.getOperand(2)); 426193323Sed Ops.clear(); 427193323Sed Ops.push_back(SDValue(Load.getNode(), 1)); 428193323Sed for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 429193323Sed Ops.push_back(Call.getOperand(i)); 430193323Sed CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); 431193323Sed} 432193323Sed 433193323Sed/// isCalleeLoad - Return true if call address is a load and it can be 434193323Sed/// moved below CALLSEQ_START and the chains leading up to the call. 435193323Sed/// Return the CALLSEQ_START by reference as a second output. 436193323Sedstatic bool isCalleeLoad(SDValue Callee, SDValue &Chain) { 437193323Sed if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 438193323Sed return false; 439193323Sed LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 440193323Sed if (!LD || 441193323Sed LD->isVolatile() || 442193323Sed LD->getAddressingMode() != ISD::UNINDEXED || 443193323Sed LD->getExtensionType() != ISD::NON_EXTLOAD) 444193323Sed return false; 445193323Sed 446193323Sed // Now let's find the callseq_start. 447193323Sed while (Chain.getOpcode() != ISD::CALLSEQ_START) { 448193323Sed if (!Chain.hasOneUse()) 449193323Sed return false; 450193323Sed Chain = Chain.getOperand(0); 451193323Sed } 452193323Sed 453193323Sed if (Chain.getOperand(0).getNode() == Callee.getNode()) 454193323Sed return true; 455193323Sed if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 456198090Srdivacky Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 457198090Srdivacky Callee.getValue(1).hasOneUse()) 458193323Sed return true; 459193323Sed return false; 460193323Sed} 461193323Sed 462193323Sed 463193323Sed/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 464193323Sed/// This is only run if not in -O0 mode. 465193323Sed/// This allows the instruction selector to pick more read-modify-write 466193323Sed/// instructions. This is a common case: 467193323Sed/// 468193323Sed/// [Load chain] 469193323Sed/// ^ 470193323Sed/// | 471193323Sed/// [Load] 472193323Sed/// ^ ^ 473193323Sed/// | | 474193323Sed/// / \- 475193323Sed/// / | 476193323Sed/// [TokenFactor] [Op] 477193323Sed/// ^ ^ 478193323Sed/// | | 479193323Sed/// \ / 480193323Sed/// \ / 481193323Sed/// [Store] 482193323Sed/// 483193323Sed/// The fact the store's chain operand != load's chain will prevent the 484193323Sed/// (store (op (load))) instruction from being selected. We can transform it to: 485193323Sed/// 486193323Sed/// [Load chain] 487193323Sed/// ^ 488193323Sed/// | 489193323Sed/// [TokenFactor] 490193323Sed/// ^ 491193323Sed/// | 492193323Sed/// [Load] 493193323Sed/// ^ ^ 494193323Sed/// | | 495193323Sed/// | \- 496193323Sed/// | | 497193323Sed/// | [Op] 498193323Sed/// | ^ 499193323Sed/// | | 500193323Sed/// \ / 501193323Sed/// \ / 502193323Sed/// [Store] 503193323Sedvoid X86DAGToDAGISel::PreprocessForRMW() { 504193323Sed for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 505193323Sed E = CurDAG->allnodes_end(); I != E; ++I) { 506193323Sed if (I->getOpcode() == X86ISD::CALL) { 507193323Sed /// Also try moving call address load from outside callseq_start to just 508193323Sed /// before the call to allow it to be folded. 509193323Sed /// 510193323Sed /// [Load chain] 511193323Sed /// ^ 512193323Sed /// | 513193323Sed /// [Load] 514193323Sed /// ^ ^ 515193323Sed /// | | 516193323Sed /// / \-- 517193323Sed /// / | 518193323Sed ///[CALLSEQ_START] | 519193323Sed /// ^ | 520193323Sed /// | | 521193323Sed /// [LOAD/C2Reg] | 522193323Sed /// | | 523193323Sed /// \ / 524193323Sed /// \ / 525193323Sed /// [CALL] 526193323Sed SDValue Chain = I->getOperand(0); 527193323Sed SDValue Load = I->getOperand(1); 528193323Sed if (!isCalleeLoad(Load, Chain)) 529193323Sed continue; 530193323Sed MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); 531193323Sed ++NumLoadMoved; 532193323Sed continue; 533193323Sed } 534193323Sed 535193323Sed if (!ISD::isNON_TRUNCStore(I)) 536193323Sed continue; 537193323Sed SDValue Chain = I->getOperand(0); 538193323Sed 539193323Sed if (Chain.getNode()->getOpcode() != ISD::TokenFactor) 540193323Sed continue; 541193323Sed 542193323Sed SDValue N1 = I->getOperand(1); 543193323Sed SDValue N2 = I->getOperand(2); 544193323Sed if ((N1.getValueType().isFloatingPoint() && 545193323Sed !N1.getValueType().isVector()) || 546193323Sed !N1.hasOneUse()) 547193323Sed continue; 548193323Sed 549193323Sed bool RModW = false; 550193323Sed SDValue Load; 551193323Sed unsigned Opcode = N1.getNode()->getOpcode(); 552193323Sed switch (Opcode) { 553193323Sed case ISD::ADD: 554193323Sed case ISD::MUL: 555193323Sed case ISD::AND: 556193323Sed case ISD::OR: 557193323Sed case ISD::XOR: 558193323Sed case ISD::ADDC: 559193323Sed case ISD::ADDE: 560193323Sed case ISD::VECTOR_SHUFFLE: { 561193323Sed SDValue N10 = N1.getOperand(0); 562193323Sed SDValue N11 = N1.getOperand(1); 563193323Sed RModW = isRMWLoad(N10, Chain, N2, Load); 564193323Sed if (!RModW) 565193323Sed RModW = isRMWLoad(N11, Chain, N2, Load); 566193323Sed break; 567193323Sed } 568193323Sed case ISD::SUB: 569193323Sed case ISD::SHL: 570193323Sed case ISD::SRA: 571193323Sed case ISD::SRL: 572193323Sed case ISD::ROTL: 573193323Sed case ISD::ROTR: 574193323Sed case ISD::SUBC: 575193323Sed case ISD::SUBE: 576193323Sed case X86ISD::SHLD: 577193323Sed case X86ISD::SHRD: { 578193323Sed SDValue N10 = N1.getOperand(0); 579193323Sed RModW = isRMWLoad(N10, Chain, N2, Load); 580193323Sed break; 581193323Sed } 582193323Sed } 583193323Sed 584193323Sed if (RModW) { 585193323Sed MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); 586193323Sed ++NumLoadMoved; 587193323Sed } 588193323Sed } 589193323Sed} 590193323Sed 591193323Sed 592193323Sed/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 593193323Sed/// nodes that target the FP stack to be store and load to the stack. This is a 594193323Sed/// gross hack. We would like to simply mark these as being illegal, but when 595193323Sed/// we do that, legalize produces these when it expands calls, then expands 596193323Sed/// these in the same legalize pass. We would like dag combine to be able to 597193323Sed/// hack on these between the call expansion and the node legalization. As such 598193323Sed/// this pass basically does "really late" legalization of these inline with the 599193323Sed/// X86 isel pass. 600193323Sedvoid X86DAGToDAGISel::PreprocessForFPConvert() { 601193323Sed for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 602193323Sed E = CurDAG->allnodes_end(); I != E; ) { 603193323Sed SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 604193323Sed if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 605193323Sed continue; 606193323Sed 607193323Sed // If the source and destination are SSE registers, then this is a legal 608193323Sed // conversion that should not be lowered. 609198090Srdivacky EVT SrcVT = N->getOperand(0).getValueType(); 610198090Srdivacky EVT DstVT = N->getValueType(0); 611193323Sed bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 612193323Sed bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 613193323Sed if (SrcIsSSE && DstIsSSE) 614193323Sed continue; 615193323Sed 616193323Sed if (!SrcIsSSE && !DstIsSSE) { 617193323Sed // If this is an FPStack extension, it is a noop. 618193323Sed if (N->getOpcode() == ISD::FP_EXTEND) 619193323Sed continue; 620193323Sed // If this is a value-preserving FPStack truncation, it is a noop. 621193323Sed if (N->getConstantOperandVal(1)) 622193323Sed continue; 623193323Sed } 624193323Sed 625193323Sed // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 626193323Sed // FPStack has extload and truncstore. SSE can fold direct loads into other 627193323Sed // operations. Based on this, decide what we want to do. 628198090Srdivacky EVT MemVT; 629193323Sed if (N->getOpcode() == ISD::FP_ROUND) 630193323Sed MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 631193323Sed else 632193323Sed MemVT = SrcIsSSE ? SrcVT : DstVT; 633193323Sed 634193323Sed SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 635193323Sed DebugLoc dl = N->getDebugLoc(); 636193323Sed 637193323Sed // FIXME: optimize the case where the src/dest is a load or store? 638193323Sed SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 639193323Sed N->getOperand(0), 640193323Sed MemTmp, NULL, 0, MemVT); 641193323Sed SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 642193323Sed NULL, 0, MemVT); 643193323Sed 644193323Sed // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 645193323Sed // extload we created. This will cause general havok on the dag because 646193323Sed // anything below the conversion could be folded into other existing nodes. 647193323Sed // To avoid invalidating 'I', back it up to the convert node. 648193323Sed --I; 649193323Sed CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 650193323Sed 651193323Sed // Now that we did that, the node is dead. Increment the iterator to the 652193323Sed // next node to process, then delete N. 653193323Sed ++I; 654193323Sed CurDAG->DeleteNode(N); 655193323Sed } 656193323Sed} 657193323Sed 658193323Sed/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 659193323Sed/// when it has created a SelectionDAG for us to codegen. 660193323Sedvoid X86DAGToDAGISel::InstructionSelect() { 661198090Srdivacky const Function *F = MF->getFunction(); 662193323Sed OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); 663193323Sed 664193323Sed DEBUG(BB->dump()); 665193323Sed if (OptLevel != CodeGenOpt::None) 666193323Sed PreprocessForRMW(); 667193323Sed 668193323Sed // FIXME: This should only happen when not compiled with -O0. 669193323Sed PreprocessForFPConvert(); 670193323Sed 671193323Sed // Codegen the basic block. 672193323Sed#ifndef NDEBUG 673198090Srdivacky DEBUG(errs() << "===== Instruction selection begins:\n"); 674193323Sed Indent = 0; 675193323Sed#endif 676193323Sed SelectRoot(*CurDAG); 677193323Sed#ifndef NDEBUG 678198090Srdivacky DEBUG(errs() << "===== Instruction selection ends:\n"); 679193323Sed#endif 680193323Sed 681193323Sed CurDAG->RemoveDeadNodes(); 682193323Sed} 683193323Sed 684193323Sed/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 685193323Sed/// the main function. 686193323Sedvoid X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 687193323Sed MachineFrameInfo *MFI) { 688193323Sed const TargetInstrInfo *TII = TM.getInstrInfo(); 689193323Sed if (Subtarget->isTargetCygMing()) 690193323Sed BuildMI(BB, DebugLoc::getUnknownLoc(), 691193323Sed TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 692193323Sed} 693193323Sed 694193323Sedvoid X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 695193323Sed // If this is main, emit special code for main. 696193323Sed MachineBasicBlock *BB = MF.begin(); 697193323Sed if (Fn.hasExternalLinkage() && Fn.getName() == "main") 698193323Sed EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 699193323Sed} 700193323Sed 701193323Sed 702193323Sedbool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, 703193323Sed X86ISelAddressMode &AM) { 704193323Sed assert(N.getOpcode() == X86ISD::SegmentBaseAddress); 705193323Sed SDValue Segment = N.getOperand(0); 706193323Sed 707193323Sed if (AM.Segment.getNode() == 0) { 708193323Sed AM.Segment = Segment; 709193323Sed return false; 710193323Sed } 711193323Sed 712193323Sed return true; 713193323Sed} 714193323Sed 715193323Sedbool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { 716193323Sed // This optimization is valid because the GNU TLS model defines that 717193323Sed // gs:0 (or fs:0 on X86-64) contains its own address. 718193323Sed // For more information see http://people.redhat.com/drepper/tls.pdf 719193323Sed 720193323Sed SDValue Address = N.getOperand(1); 721193323Sed if (Address.getOpcode() == X86ISD::SegmentBaseAddress && 722193323Sed !MatchSegmentBaseAddress (Address, AM)) 723193323Sed return false; 724193323Sed 725193323Sed return true; 726193323Sed} 727193323Sed 728195098Sed/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 729195098Sed/// into an addressing mode. These wrap things that will resolve down into a 730195098Sed/// symbol reference. If no match is possible, this returns true, otherwise it 731198090Srdivacky/// returns false. 732193323Sedbool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 733195098Sed // If the addressing mode already has a symbol as the displacement, we can 734195098Sed // never match another symbol. 735193323Sed if (AM.hasSymbolicDisplacement()) 736193323Sed return true; 737193323Sed 738193323Sed SDValue N0 = N.getOperand(0); 739198090Srdivacky CodeModel::Model M = TM.getCodeModel(); 740198090Srdivacky 741195098Sed // Handle X86-64 rip-relative addresses. We check this before checking direct 742195098Sed // folding because RIP is preferable to non-RIP accesses. 743195098Sed if (Subtarget->is64Bit() && 744195098Sed // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 745195098Sed // they cannot be folded into immediate fields. 746195098Sed // FIXME: This can be improved for kernel and other models? 747198090Srdivacky (M == CodeModel::Small || M == CodeModel::Kernel) && 748195098Sed // Base and index reg must be 0 in order to use %rip as base and lowering 749195098Sed // must allow RIP. 750195098Sed !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { 751195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 752195098Sed int64_t Offset = AM.Disp + G->getOffset(); 753198090Srdivacky if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 754195098Sed AM.GV = G->getGlobal(); 755195098Sed AM.Disp = Offset; 756195098Sed AM.SymbolFlags = G->getTargetFlags(); 757195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 758195098Sed int64_t Offset = AM.Disp + CP->getOffset(); 759198090Srdivacky if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 760195098Sed AM.CP = CP->getConstVal(); 761195098Sed AM.Align = CP->getAlignment(); 762195098Sed AM.Disp = Offset; 763195098Sed AM.SymbolFlags = CP->getTargetFlags(); 764195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 765195098Sed AM.ES = S->getSymbol(); 766195098Sed AM.SymbolFlags = S->getTargetFlags(); 767198892Srdivacky } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 768195098Sed AM.JT = J->getIndex(); 769195098Sed AM.SymbolFlags = J->getTargetFlags(); 770198892Srdivacky } else { 771198892Srdivacky AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); 772198892Srdivacky //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); 773193323Sed } 774198090Srdivacky 775195098Sed if (N.getOpcode() == X86ISD::WrapperRIP) 776195098Sed AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 777195098Sed return false; 778195098Sed } 779195098Sed 780195098Sed // Handle the case when globals fit in our immediate field: This is true for 781195098Sed // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit 782195098Sed // mode, this results in a non-RIP-relative computation. 783195098Sed if (!Subtarget->is64Bit() || 784198090Srdivacky ((M == CodeModel::Small || M == CodeModel::Kernel) && 785195098Sed TM.getRelocationModel() == Reloc::Static)) { 786195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 787195098Sed AM.GV = G->getGlobal(); 788195098Sed AM.Disp += G->getOffset(); 789195098Sed AM.SymbolFlags = G->getTargetFlags(); 790195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 791193323Sed AM.CP = CP->getConstVal(); 792193323Sed AM.Align = CP->getAlignment(); 793195098Sed AM.Disp += CP->getOffset(); 794195098Sed AM.SymbolFlags = CP->getTargetFlags(); 795195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 796195098Sed AM.ES = S->getSymbol(); 797195098Sed AM.SymbolFlags = S->getTargetFlags(); 798198892Srdivacky } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 799195098Sed AM.JT = J->getIndex(); 800195098Sed AM.SymbolFlags = J->getTargetFlags(); 801198892Srdivacky } else { 802198892Srdivacky AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); 803198892Srdivacky //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); 804193323Sed } 805193323Sed return false; 806193323Sed } 807193323Sed 808193323Sed return true; 809193323Sed} 810193323Sed 811193323Sed/// MatchAddress - Add the specified node to the specified addressing mode, 812193323Sed/// returning true if it cannot be done. This just pattern matches for the 813193323Sed/// addressing mode. 814198090Srdivackybool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 815198090Srdivacky if (MatchAddressRecursively(N, AM, 0)) 816198090Srdivacky return true; 817198090Srdivacky 818198090Srdivacky // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 819198090Srdivacky // a smaller encoding and avoids a scaled-index. 820198090Srdivacky if (AM.Scale == 2 && 821198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 822198090Srdivacky AM.Base.Reg.getNode() == 0) { 823198090Srdivacky AM.Base.Reg = AM.IndexReg; 824198090Srdivacky AM.Scale = 1; 825198090Srdivacky } 826198090Srdivacky 827198090Srdivacky // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 828198090Srdivacky // because it has a smaller encoding. 829198090Srdivacky // TODO: Which other code models can use this? 830198090Srdivacky if (TM.getCodeModel() == CodeModel::Small && 831198090Srdivacky Subtarget->is64Bit() && 832198090Srdivacky AM.Scale == 1 && 833198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 834198090Srdivacky AM.Base.Reg.getNode() == 0 && 835198090Srdivacky AM.IndexReg.getNode() == 0 && 836198090Srdivacky AM.SymbolFlags == X86II::MO_NO_FLAG && 837198090Srdivacky AM.hasSymbolicDisplacement()) 838198090Srdivacky AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 839198090Srdivacky 840198090Srdivacky return false; 841198090Srdivacky} 842198090Srdivacky 843198090Srdivackybool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 844198090Srdivacky unsigned Depth) { 845193323Sed bool is64Bit = Subtarget->is64Bit(); 846193323Sed DebugLoc dl = N.getDebugLoc(); 847198090Srdivacky DEBUG({ 848198090Srdivacky errs() << "MatchAddress: "; 849198090Srdivacky AM.dump(); 850198090Srdivacky }); 851193323Sed // Limit recursion. 852193323Sed if (Depth > 5) 853193323Sed return MatchAddressBase(N, AM); 854198090Srdivacky 855198090Srdivacky CodeModel::Model M = TM.getCodeModel(); 856198090Srdivacky 857195098Sed // If this is already a %rip relative address, we can only merge immediates 858195098Sed // into it. Instead of handling this in every case, we handle it here. 859193323Sed // RIP relative addressing: %rip + 32-bit displacement! 860195098Sed if (AM.isRIPRelative()) { 861195098Sed // FIXME: JumpTable and ExternalSymbol address currently don't like 862195098Sed // displacements. It isn't very important, but this should be fixed for 863195098Sed // consistency. 864195098Sed if (!AM.ES && AM.JT != -1) return true; 865198090Srdivacky 866195098Sed if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { 867195098Sed int64_t Val = AM.Disp + Cst->getSExtValue(); 868198090Srdivacky if (X86::isOffsetSuitableForCodeModel(Val, M, 869198090Srdivacky AM.hasSymbolicDisplacement())) { 870195098Sed AM.Disp = Val; 871193323Sed return false; 872193323Sed } 873193323Sed } 874193323Sed return true; 875193323Sed } 876193323Sed 877193323Sed switch (N.getOpcode()) { 878193323Sed default: break; 879193323Sed case ISD::Constant: { 880193323Sed uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 881198090Srdivacky if (!is64Bit || 882198090Srdivacky X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, 883198090Srdivacky AM.hasSymbolicDisplacement())) { 884193323Sed AM.Disp += Val; 885193323Sed return false; 886193323Sed } 887193323Sed break; 888193323Sed } 889193323Sed 890193323Sed case X86ISD::SegmentBaseAddress: 891193323Sed if (!MatchSegmentBaseAddress(N, AM)) 892193323Sed return false; 893193323Sed break; 894193323Sed 895193323Sed case X86ISD::Wrapper: 896195098Sed case X86ISD::WrapperRIP: 897193323Sed if (!MatchWrapper(N, AM)) 898193323Sed return false; 899193323Sed break; 900193323Sed 901193323Sed case ISD::LOAD: 902193323Sed if (!MatchLoad(N, AM)) 903193323Sed return false; 904193323Sed break; 905193323Sed 906193323Sed case ISD::FrameIndex: 907193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase 908193323Sed && AM.Base.Reg.getNode() == 0) { 909193323Sed AM.BaseType = X86ISelAddressMode::FrameIndexBase; 910193323Sed AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 911193323Sed return false; 912193323Sed } 913193323Sed break; 914193323Sed 915193323Sed case ISD::SHL: 916195098Sed if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) 917193323Sed break; 918193323Sed 919193323Sed if (ConstantSDNode 920193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 921193323Sed unsigned Val = CN->getZExtValue(); 922198090Srdivacky // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 923198090Srdivacky // that the base operand remains free for further matching. If 924198090Srdivacky // the base doesn't end up getting used, a post-processing step 925198090Srdivacky // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 926193323Sed if (Val == 1 || Val == 2 || Val == 3) { 927193323Sed AM.Scale = 1 << Val; 928193323Sed SDValue ShVal = N.getNode()->getOperand(0); 929193323Sed 930193323Sed // Okay, we know that we have a scale by now. However, if the scaled 931193323Sed // value is an add of something and a constant, we can fold the 932193323Sed // constant into the disp field here. 933193323Sed if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 934193323Sed isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { 935193323Sed AM.IndexReg = ShVal.getNode()->getOperand(0); 936193323Sed ConstantSDNode *AddVal = 937193323Sed cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 938193323Sed uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); 939198090Srdivacky if (!is64Bit || 940198090Srdivacky X86::isOffsetSuitableForCodeModel(Disp, M, 941198090Srdivacky AM.hasSymbolicDisplacement())) 942193323Sed AM.Disp = Disp; 943193323Sed else 944193323Sed AM.IndexReg = ShVal; 945193323Sed } else { 946193323Sed AM.IndexReg = ShVal; 947193323Sed } 948193323Sed return false; 949193323Sed } 950193323Sed break; 951193323Sed } 952193323Sed 953193323Sed case ISD::SMUL_LOHI: 954193323Sed case ISD::UMUL_LOHI: 955193323Sed // A mul_lohi where we need the low part can be folded as a plain multiply. 956193323Sed if (N.getResNo() != 0) break; 957193323Sed // FALL THROUGH 958193323Sed case ISD::MUL: 959193323Sed case X86ISD::MUL_IMM: 960193323Sed // X*[3,5,9] -> X+X*[2,4,8] 961193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 962193323Sed AM.Base.Reg.getNode() == 0 && 963195098Sed AM.IndexReg.getNode() == 0) { 964193323Sed if (ConstantSDNode 965193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 966193323Sed if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 967193323Sed CN->getZExtValue() == 9) { 968193323Sed AM.Scale = unsigned(CN->getZExtValue())-1; 969193323Sed 970193323Sed SDValue MulVal = N.getNode()->getOperand(0); 971193323Sed SDValue Reg; 972193323Sed 973193323Sed // Okay, we know that we have a scale by now. However, if the scaled 974193323Sed // value is an add of something and a constant, we can fold the 975193323Sed // constant into the disp field here. 976193323Sed if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 977193323Sed isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 978193323Sed Reg = MulVal.getNode()->getOperand(0); 979193323Sed ConstantSDNode *AddVal = 980193323Sed cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 981193323Sed uint64_t Disp = AM.Disp + AddVal->getSExtValue() * 982193323Sed CN->getZExtValue(); 983198090Srdivacky if (!is64Bit || 984198090Srdivacky X86::isOffsetSuitableForCodeModel(Disp, M, 985198090Srdivacky AM.hasSymbolicDisplacement())) 986193323Sed AM.Disp = Disp; 987193323Sed else 988193323Sed Reg = N.getNode()->getOperand(0); 989193323Sed } else { 990193323Sed Reg = N.getNode()->getOperand(0); 991193323Sed } 992193323Sed 993193323Sed AM.IndexReg = AM.Base.Reg = Reg; 994193323Sed return false; 995193323Sed } 996193323Sed } 997193323Sed break; 998193323Sed 999193323Sed case ISD::SUB: { 1000193323Sed // Given A-B, if A can be completely folded into the address and 1001193323Sed // the index field with the index field unused, use -B as the index. 1002193323Sed // This is a win if a has multiple parts that can be folded into 1003193323Sed // the address. Also, this saves a mov if the base register has 1004193323Sed // other uses, since it avoids a two-address sub instruction, however 1005193323Sed // it costs an additional mov if the index register has other uses. 1006193323Sed 1007193323Sed // Test if the LHS of the sub can be folded. 1008193323Sed X86ISelAddressMode Backup = AM; 1009198090Srdivacky if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 1010193323Sed AM = Backup; 1011193323Sed break; 1012193323Sed } 1013193323Sed // Test if the index field is free for use. 1014195098Sed if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 1015193323Sed AM = Backup; 1016193323Sed break; 1017193323Sed } 1018193323Sed int Cost = 0; 1019193323Sed SDValue RHS = N.getNode()->getOperand(1); 1020193323Sed // If the RHS involves a register with multiple uses, this 1021193323Sed // transformation incurs an extra mov, due to the neg instruction 1022193323Sed // clobbering its operand. 1023193323Sed if (!RHS.getNode()->hasOneUse() || 1024193323Sed RHS.getNode()->getOpcode() == ISD::CopyFromReg || 1025193323Sed RHS.getNode()->getOpcode() == ISD::TRUNCATE || 1026193323Sed RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 1027193323Sed (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 1028193323Sed RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 1029193323Sed ++Cost; 1030193323Sed // If the base is a register with multiple uses, this 1031193323Sed // transformation may save a mov. 1032193323Sed if ((AM.BaseType == X86ISelAddressMode::RegBase && 1033193323Sed AM.Base.Reg.getNode() && 1034193323Sed !AM.Base.Reg.getNode()->hasOneUse()) || 1035193323Sed AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1036193323Sed --Cost; 1037193323Sed // If the folded LHS was interesting, this transformation saves 1038193323Sed // address arithmetic. 1039193323Sed if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 1040193323Sed ((AM.Disp != 0) && (Backup.Disp == 0)) + 1041193323Sed (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 1042193323Sed --Cost; 1043193323Sed // If it doesn't look like it may be an overall win, don't do it. 1044193323Sed if (Cost >= 0) { 1045193323Sed AM = Backup; 1046193323Sed break; 1047193323Sed } 1048193323Sed 1049193323Sed // Ok, the transformation is legal and appears profitable. Go for it. 1050193323Sed SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 1051193323Sed SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 1052193323Sed AM.IndexReg = Neg; 1053193323Sed AM.Scale = 1; 1054193323Sed 1055193323Sed // Insert the new nodes into the topological ordering. 1056193323Sed if (Zero.getNode()->getNodeId() == -1 || 1057193323Sed Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1058193323Sed CurDAG->RepositionNode(N.getNode(), Zero.getNode()); 1059193323Sed Zero.getNode()->setNodeId(N.getNode()->getNodeId()); 1060193323Sed } 1061193323Sed if (Neg.getNode()->getNodeId() == -1 || 1062193323Sed Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1063193323Sed CurDAG->RepositionNode(N.getNode(), Neg.getNode()); 1064193323Sed Neg.getNode()->setNodeId(N.getNode()->getNodeId()); 1065193323Sed } 1066193323Sed return false; 1067193323Sed } 1068193323Sed 1069193323Sed case ISD::ADD: { 1070193323Sed X86ISelAddressMode Backup = AM; 1071198090Srdivacky if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && 1072198090Srdivacky !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) 1073193323Sed return false; 1074193323Sed AM = Backup; 1075198090Srdivacky if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && 1076198090Srdivacky !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) 1077193323Sed return false; 1078193323Sed AM = Backup; 1079193323Sed 1080193323Sed // If we couldn't fold both operands into the address at the same time, 1081193323Sed // see if we can just put each operand into a register and fold at least 1082193323Sed // the add. 1083193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 1084193323Sed !AM.Base.Reg.getNode() && 1085195098Sed !AM.IndexReg.getNode()) { 1086193323Sed AM.Base.Reg = N.getNode()->getOperand(0); 1087193323Sed AM.IndexReg = N.getNode()->getOperand(1); 1088193323Sed AM.Scale = 1; 1089193323Sed return false; 1090193323Sed } 1091193323Sed break; 1092193323Sed } 1093193323Sed 1094193323Sed case ISD::OR: 1095193323Sed // Handle "X | C" as "X + C" iff X is known to have C bits clear. 1096193323Sed if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1097193323Sed X86ISelAddressMode Backup = AM; 1098193323Sed uint64_t Offset = CN->getSExtValue(); 1099193323Sed // Start with the LHS as an addr mode. 1100198090Srdivacky if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1101193323Sed // Address could not have picked a GV address for the displacement. 1102193323Sed AM.GV == NULL && 1103193323Sed // On x86-64, the resultant disp must fit in 32-bits. 1104198090Srdivacky (!is64Bit || 1105198090Srdivacky X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, 1106198090Srdivacky AM.hasSymbolicDisplacement())) && 1107193323Sed // Check to see if the LHS & C is zero. 1108193323Sed CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 1109193323Sed AM.Disp += Offset; 1110193323Sed return false; 1111193323Sed } 1112193323Sed AM = Backup; 1113193323Sed } 1114193323Sed break; 1115193323Sed 1116193323Sed case ISD::AND: { 1117193323Sed // Perform some heroic transforms on an and of a constant-count shift 1118193323Sed // with a constant to enable use of the scaled offset field. 1119193323Sed 1120193323Sed SDValue Shift = N.getOperand(0); 1121193323Sed if (Shift.getNumOperands() != 2) break; 1122193323Sed 1123193323Sed // Scale must not be used already. 1124193323Sed if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1125193323Sed 1126193323Sed SDValue X = Shift.getOperand(0); 1127193323Sed ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1128193323Sed ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 1129193323Sed if (!C1 || !C2) break; 1130193323Sed 1131193323Sed // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This 1132193323Sed // allows us to convert the shift and and into an h-register extract and 1133193323Sed // a scaled index. 1134193323Sed if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { 1135193323Sed unsigned ScaleLog = 8 - C1->getZExtValue(); 1136193323Sed if (ScaleLog > 0 && ScaleLog < 4 && 1137193323Sed C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { 1138193323Sed SDValue Eight = CurDAG->getConstant(8, MVT::i8); 1139193323Sed SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); 1140193323Sed SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1141193323Sed X, Eight); 1142193323Sed SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), 1143193323Sed Srl, Mask); 1144193323Sed SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); 1145193323Sed SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1146193323Sed And, ShlCount); 1147193323Sed 1148193323Sed // Insert the new nodes into the topological ordering. 1149193323Sed if (Eight.getNode()->getNodeId() == -1 || 1150193323Sed Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1151193323Sed CurDAG->RepositionNode(X.getNode(), Eight.getNode()); 1152193323Sed Eight.getNode()->setNodeId(X.getNode()->getNodeId()); 1153193323Sed } 1154193323Sed if (Mask.getNode()->getNodeId() == -1 || 1155193323Sed Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1156193323Sed CurDAG->RepositionNode(X.getNode(), Mask.getNode()); 1157193323Sed Mask.getNode()->setNodeId(X.getNode()->getNodeId()); 1158193323Sed } 1159193323Sed if (Srl.getNode()->getNodeId() == -1 || 1160193323Sed Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1161193323Sed CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); 1162193323Sed Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1163193323Sed } 1164193323Sed if (And.getNode()->getNodeId() == -1 || 1165193323Sed And.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1166193323Sed CurDAG->RepositionNode(N.getNode(), And.getNode()); 1167193323Sed And.getNode()->setNodeId(N.getNode()->getNodeId()); 1168193323Sed } 1169193323Sed if (ShlCount.getNode()->getNodeId() == -1 || 1170193323Sed ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1171193323Sed CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); 1172193323Sed ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); 1173193323Sed } 1174193323Sed if (Shl.getNode()->getNodeId() == -1 || 1175193323Sed Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1176193323Sed CurDAG->RepositionNode(N.getNode(), Shl.getNode()); 1177193323Sed Shl.getNode()->setNodeId(N.getNode()->getNodeId()); 1178193323Sed } 1179193323Sed CurDAG->ReplaceAllUsesWith(N, Shl); 1180193323Sed AM.IndexReg = And; 1181193323Sed AM.Scale = (1 << ScaleLog); 1182193323Sed return false; 1183193323Sed } 1184193323Sed } 1185193323Sed 1186193323Sed // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 1187193323Sed // allows us to fold the shift into this addressing mode. 1188193323Sed if (Shift.getOpcode() != ISD::SHL) break; 1189193323Sed 1190193323Sed // Not likely to be profitable if either the AND or SHIFT node has more 1191193323Sed // than one use (unless all uses are for address computation). Besides, 1192193323Sed // isel mechanism requires their node ids to be reused. 1193193323Sed if (!N.hasOneUse() || !Shift.hasOneUse()) 1194193323Sed break; 1195193323Sed 1196193323Sed // Verify that the shift amount is something we can fold. 1197193323Sed unsigned ShiftCst = C1->getZExtValue(); 1198193323Sed if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 1199193323Sed break; 1200193323Sed 1201193323Sed // Get the new AND mask, this folds to a constant. 1202193323Sed SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1203193323Sed SDValue(C2, 0), SDValue(C1, 0)); 1204193323Sed SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 1205193323Sed NewANDMask); 1206193323Sed SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1207193323Sed NewAND, SDValue(C1, 0)); 1208193323Sed 1209193323Sed // Insert the new nodes into the topological ordering. 1210193323Sed if (C1->getNodeId() > X.getNode()->getNodeId()) { 1211193323Sed CurDAG->RepositionNode(X.getNode(), C1); 1212193323Sed C1->setNodeId(X.getNode()->getNodeId()); 1213193323Sed } 1214193323Sed if (NewANDMask.getNode()->getNodeId() == -1 || 1215193323Sed NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1216193323Sed CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); 1217193323Sed NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); 1218193323Sed } 1219193323Sed if (NewAND.getNode()->getNodeId() == -1 || 1220193323Sed NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1221193323Sed CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); 1222193323Sed NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1223193323Sed } 1224193323Sed if (NewSHIFT.getNode()->getNodeId() == -1 || 1225193323Sed NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1226193323Sed CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); 1227193323Sed NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); 1228193323Sed } 1229193323Sed 1230193323Sed CurDAG->ReplaceAllUsesWith(N, NewSHIFT); 1231193323Sed 1232193323Sed AM.Scale = 1 << ShiftCst; 1233193323Sed AM.IndexReg = NewAND; 1234193323Sed return false; 1235193323Sed } 1236193323Sed } 1237193323Sed 1238193323Sed return MatchAddressBase(N, AM); 1239193323Sed} 1240193323Sed 1241193323Sed/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1242193323Sed/// specified addressing mode without any further recursion. 1243193323Sedbool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1244193323Sed // Is the base register already occupied? 1245193323Sed if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { 1246193323Sed // If so, check to see if the scale index register is set. 1247195098Sed if (AM.IndexReg.getNode() == 0) { 1248193323Sed AM.IndexReg = N; 1249193323Sed AM.Scale = 1; 1250193323Sed return false; 1251193323Sed } 1252193323Sed 1253193323Sed // Otherwise, we cannot select it. 1254193323Sed return true; 1255193323Sed } 1256193323Sed 1257193323Sed // Default, generate it as a register. 1258193323Sed AM.BaseType = X86ISelAddressMode::RegBase; 1259193323Sed AM.Base.Reg = N; 1260193323Sed return false; 1261193323Sed} 1262193323Sed 1263193323Sed/// SelectAddr - returns true if it is able pattern match an addressing mode. 1264193323Sed/// It returns the operands which make up the maximal addressing mode it can 1265193323Sed/// match by reference. 1266193323Sedbool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, 1267193323Sed SDValue &Scale, SDValue &Index, 1268193323Sed SDValue &Disp, SDValue &Segment) { 1269193323Sed X86ISelAddressMode AM; 1270193323Sed bool Done = false; 1271193323Sed if (AvoidDupAddrCompute && !N.hasOneUse()) { 1272193323Sed unsigned Opcode = N.getOpcode(); 1273193323Sed if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && 1274195098Sed Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { 1275193323Sed // If we are able to fold N into addressing mode, then we'll allow it even 1276193323Sed // if N has multiple uses. In general, addressing computation is used as 1277193323Sed // addresses by all of its uses. But watch out for CopyToReg uses, that 1278193323Sed // means the address computation is liveout. It will be computed by a LEA 1279193323Sed // so we want to avoid computing the address twice. 1280193323Sed for (SDNode::use_iterator UI = N.getNode()->use_begin(), 1281193323Sed UE = N.getNode()->use_end(); UI != UE; ++UI) { 1282193323Sed if (UI->getOpcode() == ISD::CopyToReg) { 1283193323Sed MatchAddressBase(N, AM); 1284193323Sed Done = true; 1285193323Sed break; 1286193323Sed } 1287193323Sed } 1288193323Sed } 1289193323Sed } 1290193323Sed 1291193323Sed if (!Done && MatchAddress(N, AM)) 1292193323Sed return false; 1293193323Sed 1294198090Srdivacky EVT VT = N.getValueType(); 1295193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) { 1296193323Sed if (!AM.Base.Reg.getNode()) 1297193323Sed AM.Base.Reg = CurDAG->getRegister(0, VT); 1298193323Sed } 1299193323Sed 1300193323Sed if (!AM.IndexReg.getNode()) 1301193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1302193323Sed 1303193323Sed getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1304193323Sed return true; 1305193323Sed} 1306193323Sed 1307193323Sed/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1308193323Sed/// match a load whose top elements are either undef or zeros. The load flavor 1309193323Sed/// is derived from the type of N, which is either v4f32 or v2f64. 1310193323Sedbool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, 1311193323Sed SDValue N, SDValue &Base, 1312193323Sed SDValue &Scale, SDValue &Index, 1313193323Sed SDValue &Disp, SDValue &Segment, 1314193323Sed SDValue &InChain, 1315193323Sed SDValue &OutChain) { 1316193323Sed if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1317193323Sed InChain = N.getOperand(0).getValue(1); 1318193323Sed if (ISD::isNON_EXTLoad(InChain.getNode()) && 1319193323Sed InChain.getValue(0).hasOneUse() && 1320193323Sed N.hasOneUse() && 1321193323Sed IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { 1322193323Sed LoadSDNode *LD = cast<LoadSDNode>(InChain); 1323193323Sed if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1324193323Sed return false; 1325193323Sed OutChain = LD->getChain(); 1326193323Sed return true; 1327193323Sed } 1328193323Sed } 1329193323Sed 1330193323Sed // Also handle the case where we explicitly require zeros in the top 1331193323Sed // elements. This is a vector shuffle from the zero vector. 1332193323Sed if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1333193323Sed // Check to see if the top elements are all zeros (or bitcast of zeros). 1334193323Sed N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1335193323Sed N.getOperand(0).getNode()->hasOneUse() && 1336193323Sed ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1337193323Sed N.getOperand(0).getOperand(0).hasOneUse()) { 1338193323Sed // Okay, this is a zero extending load. Fold it. 1339193323Sed LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1340193323Sed if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1341193323Sed return false; 1342193323Sed OutChain = LD->getChain(); 1343193323Sed InChain = SDValue(LD, 1); 1344193323Sed return true; 1345193323Sed } 1346193323Sed return false; 1347193323Sed} 1348193323Sed 1349193323Sed 1350193323Sed/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1351193323Sed/// mode it matches can be cost effectively emitted as an LEA instruction. 1352193323Sedbool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, 1353193323Sed SDValue &Base, SDValue &Scale, 1354193323Sed SDValue &Index, SDValue &Disp) { 1355193323Sed X86ISelAddressMode AM; 1356193323Sed 1357193323Sed // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1358193323Sed // segments. 1359193323Sed SDValue Copy = AM.Segment; 1360193323Sed SDValue T = CurDAG->getRegister(0, MVT::i32); 1361193323Sed AM.Segment = T; 1362193323Sed if (MatchAddress(N, AM)) 1363193323Sed return false; 1364193323Sed assert (T == AM.Segment); 1365193323Sed AM.Segment = Copy; 1366193323Sed 1367198090Srdivacky EVT VT = N.getValueType(); 1368193323Sed unsigned Complexity = 0; 1369193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) 1370193323Sed if (AM.Base.Reg.getNode()) 1371193323Sed Complexity = 1; 1372193323Sed else 1373193323Sed AM.Base.Reg = CurDAG->getRegister(0, VT); 1374193323Sed else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1375193323Sed Complexity = 4; 1376193323Sed 1377193323Sed if (AM.IndexReg.getNode()) 1378193323Sed Complexity++; 1379193323Sed else 1380193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1381193323Sed 1382193323Sed // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1383193323Sed // a simple shift. 1384193323Sed if (AM.Scale > 1) 1385193323Sed Complexity++; 1386193323Sed 1387193323Sed // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1388193323Sed // to a LEA. This is determined with some expermentation but is by no means 1389193323Sed // optimal (especially for code size consideration). LEA is nice because of 1390193323Sed // its three-address nature. Tweak the cost function again when we can run 1391193323Sed // convertToThreeAddress() at register allocation time. 1392193323Sed if (AM.hasSymbolicDisplacement()) { 1393193323Sed // For X86-64, we should always use lea to materialize RIP relative 1394193323Sed // addresses. 1395193323Sed if (Subtarget->is64Bit()) 1396193323Sed Complexity = 4; 1397193323Sed else 1398193323Sed Complexity += 2; 1399193323Sed } 1400193323Sed 1401193323Sed if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 1402193323Sed Complexity++; 1403193323Sed 1404198090Srdivacky // If it isn't worth using an LEA, reject it. 1405198090Srdivacky if (Complexity <= 2) 1406198090Srdivacky return false; 1407198090Srdivacky 1408198090Srdivacky SDValue Segment; 1409198090Srdivacky getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1410198090Srdivacky return true; 1411193323Sed} 1412193323Sed 1413194612Sed/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1414194612Sedbool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 1415194612Sed SDValue &Scale, SDValue &Index, 1416194612Sed SDValue &Disp) { 1417194612Sed assert(Op.getOpcode() == X86ISD::TLSADDR); 1418194612Sed assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1419194612Sed const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1420194612Sed 1421194612Sed X86ISelAddressMode AM; 1422194612Sed AM.GV = GA->getGlobal(); 1423194612Sed AM.Disp += GA->getOffset(); 1424194612Sed AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); 1425195098Sed AM.SymbolFlags = GA->getTargetFlags(); 1426195098Sed 1427194612Sed if (N.getValueType() == MVT::i32) { 1428194612Sed AM.Scale = 1; 1429194612Sed AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1430194612Sed } else { 1431194612Sed AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1432194612Sed } 1433194612Sed 1434194612Sed SDValue Segment; 1435194612Sed getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1436194612Sed return true; 1437194612Sed} 1438194612Sed 1439194612Sed 1440193323Sedbool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, 1441193323Sed SDValue &Base, SDValue &Scale, 1442193323Sed SDValue &Index, SDValue &Disp, 1443193323Sed SDValue &Segment) { 1444193323Sed if (ISD::isNON_EXTLoad(N.getNode()) && 1445193323Sed N.hasOneUse() && 1446193323Sed IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) 1447193323Sed return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); 1448193323Sed return false; 1449193323Sed} 1450193323Sed 1451193323Sed/// getGlobalBaseReg - Return an SDNode that returns the value of 1452193323Sed/// the global base register. Output instructions required to 1453193323Sed/// initialize the global base register, if necessary. 1454193323Sed/// 1455193323SedSDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1456193399Sed unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1457193323Sed return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); 1458193323Sed} 1459193323Sed 1460193323Sedstatic SDNode *FindCallStartFromCall(SDNode *Node) { 1461193323Sed if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1462193323Sed assert(Node->getOperand(0).getValueType() == MVT::Other && 1463193323Sed "Node doesn't have a token chain argument!"); 1464193323Sed return FindCallStartFromCall(Node->getOperand(0).getNode()); 1465193323Sed} 1466193323Sed 1467193323SedSDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1468193323Sed SDValue Chain = Node->getOperand(0); 1469193323Sed SDValue In1 = Node->getOperand(1); 1470193323Sed SDValue In2L = Node->getOperand(2); 1471193323Sed SDValue In2H = Node->getOperand(3); 1472193323Sed SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1473193323Sed if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1474193323Sed return NULL; 1475198090Srdivacky MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1476198090Srdivacky MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1477198090Srdivacky const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; 1478198090Srdivacky SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), 1479198090Srdivacky MVT::i32, MVT::i32, MVT::Other, Ops, 1480198090Srdivacky array_lengthof(Ops)); 1481198090Srdivacky cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); 1482198090Srdivacky return ResNode; 1483193323Sed} 1484193323Sed 1485198090SrdivackySDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { 1486198090Srdivacky if (Node->hasAnyUseOfValue(0)) 1487198090Srdivacky return 0; 1488198090Srdivacky 1489198090Srdivacky // Optimize common patterns for __sync_add_and_fetch and 1490198090Srdivacky // __sync_sub_and_fetch where the result is not used. This allows us 1491198090Srdivacky // to use "lock" version of add, sub, inc, dec instructions. 1492198090Srdivacky // FIXME: Do not use special instructions but instead add the "lock" 1493198090Srdivacky // prefix to the target node somehow. The extra information will then be 1494198090Srdivacky // transferred to machine instruction and it denotes the prefix. 1495198090Srdivacky SDValue Chain = Node->getOperand(0); 1496198090Srdivacky SDValue Ptr = Node->getOperand(1); 1497198090Srdivacky SDValue Val = Node->getOperand(2); 1498198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1499198090Srdivacky if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1500198090Srdivacky return 0; 1501198090Srdivacky 1502198090Srdivacky bool isInc = false, isDec = false, isSub = false, isCN = false; 1503198090Srdivacky ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); 1504198090Srdivacky if (CN) { 1505198090Srdivacky isCN = true; 1506198090Srdivacky int64_t CNVal = CN->getSExtValue(); 1507198090Srdivacky if (CNVal == 1) 1508198090Srdivacky isInc = true; 1509198090Srdivacky else if (CNVal == -1) 1510198090Srdivacky isDec = true; 1511198090Srdivacky else if (CNVal >= 0) 1512198090Srdivacky Val = CurDAG->getTargetConstant(CNVal, NVT); 1513198090Srdivacky else { 1514198090Srdivacky isSub = true; 1515198090Srdivacky Val = CurDAG->getTargetConstant(-CNVal, NVT); 1516198090Srdivacky } 1517198090Srdivacky } else if (Val.hasOneUse() && 1518198090Srdivacky Val.getOpcode() == ISD::SUB && 1519198090Srdivacky X86::isZeroNode(Val.getOperand(0))) { 1520198090Srdivacky isSub = true; 1521198090Srdivacky Val = Val.getOperand(1); 1522198090Srdivacky } 1523198090Srdivacky 1524198090Srdivacky unsigned Opc = 0; 1525198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1526198090Srdivacky default: return 0; 1527198090Srdivacky case MVT::i8: 1528198090Srdivacky if (isInc) 1529198090Srdivacky Opc = X86::LOCK_INC8m; 1530198090Srdivacky else if (isDec) 1531198090Srdivacky Opc = X86::LOCK_DEC8m; 1532198090Srdivacky else if (isSub) { 1533198090Srdivacky if (isCN) 1534198090Srdivacky Opc = X86::LOCK_SUB8mi; 1535198090Srdivacky else 1536198090Srdivacky Opc = X86::LOCK_SUB8mr; 1537198090Srdivacky } else { 1538198090Srdivacky if (isCN) 1539198090Srdivacky Opc = X86::LOCK_ADD8mi; 1540198090Srdivacky else 1541198090Srdivacky Opc = X86::LOCK_ADD8mr; 1542198090Srdivacky } 1543198090Srdivacky break; 1544198090Srdivacky case MVT::i16: 1545198090Srdivacky if (isInc) 1546198090Srdivacky Opc = X86::LOCK_INC16m; 1547198090Srdivacky else if (isDec) 1548198090Srdivacky Opc = X86::LOCK_DEC16m; 1549198090Srdivacky else if (isSub) { 1550198090Srdivacky if (isCN) { 1551198090Srdivacky if (Predicate_i16immSExt8(Val.getNode())) 1552198090Srdivacky Opc = X86::LOCK_SUB16mi8; 1553198090Srdivacky else 1554198090Srdivacky Opc = X86::LOCK_SUB16mi; 1555198090Srdivacky } else 1556198090Srdivacky Opc = X86::LOCK_SUB16mr; 1557198090Srdivacky } else { 1558198090Srdivacky if (isCN) { 1559198090Srdivacky if (Predicate_i16immSExt8(Val.getNode())) 1560198090Srdivacky Opc = X86::LOCK_ADD16mi8; 1561198090Srdivacky else 1562198090Srdivacky Opc = X86::LOCK_ADD16mi; 1563198090Srdivacky } else 1564198090Srdivacky Opc = X86::LOCK_ADD16mr; 1565198090Srdivacky } 1566198090Srdivacky break; 1567198090Srdivacky case MVT::i32: 1568198090Srdivacky if (isInc) 1569198090Srdivacky Opc = X86::LOCK_INC32m; 1570198090Srdivacky else if (isDec) 1571198090Srdivacky Opc = X86::LOCK_DEC32m; 1572198090Srdivacky else if (isSub) { 1573198090Srdivacky if (isCN) { 1574198090Srdivacky if (Predicate_i32immSExt8(Val.getNode())) 1575198090Srdivacky Opc = X86::LOCK_SUB32mi8; 1576198090Srdivacky else 1577198090Srdivacky Opc = X86::LOCK_SUB32mi; 1578198090Srdivacky } else 1579198090Srdivacky Opc = X86::LOCK_SUB32mr; 1580198090Srdivacky } else { 1581198090Srdivacky if (isCN) { 1582198090Srdivacky if (Predicate_i32immSExt8(Val.getNode())) 1583198090Srdivacky Opc = X86::LOCK_ADD32mi8; 1584198090Srdivacky else 1585198090Srdivacky Opc = X86::LOCK_ADD32mi; 1586198090Srdivacky } else 1587198090Srdivacky Opc = X86::LOCK_ADD32mr; 1588198090Srdivacky } 1589198090Srdivacky break; 1590198090Srdivacky case MVT::i64: 1591198090Srdivacky if (isInc) 1592198090Srdivacky Opc = X86::LOCK_INC64m; 1593198090Srdivacky else if (isDec) 1594198090Srdivacky Opc = X86::LOCK_DEC64m; 1595198090Srdivacky else if (isSub) { 1596198090Srdivacky Opc = X86::LOCK_SUB64mr; 1597198090Srdivacky if (isCN) { 1598198090Srdivacky if (Predicate_i64immSExt8(Val.getNode())) 1599198090Srdivacky Opc = X86::LOCK_SUB64mi8; 1600198090Srdivacky else if (Predicate_i64immSExt32(Val.getNode())) 1601198090Srdivacky Opc = X86::LOCK_SUB64mi32; 1602198090Srdivacky } 1603198090Srdivacky } else { 1604198090Srdivacky Opc = X86::LOCK_ADD64mr; 1605198090Srdivacky if (isCN) { 1606198090Srdivacky if (Predicate_i64immSExt8(Val.getNode())) 1607198090Srdivacky Opc = X86::LOCK_ADD64mi8; 1608198090Srdivacky else if (Predicate_i64immSExt32(Val.getNode())) 1609198090Srdivacky Opc = X86::LOCK_ADD64mi32; 1610198090Srdivacky } 1611198090Srdivacky } 1612198090Srdivacky break; 1613198090Srdivacky } 1614198090Srdivacky 1615198090Srdivacky DebugLoc dl = Node->getDebugLoc(); 1616198090Srdivacky SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, 1617198090Srdivacky dl, NVT), 0); 1618198090Srdivacky MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1619198090Srdivacky MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1620198090Srdivacky if (isInc || isDec) { 1621198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; 1622198090Srdivacky SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); 1623198090Srdivacky cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1624198090Srdivacky SDValue RetVals[] = { Undef, Ret }; 1625198090Srdivacky return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1626198090Srdivacky } else { 1627198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; 1628198090Srdivacky SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); 1629198090Srdivacky cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1630198090Srdivacky SDValue RetVals[] = { Undef, Ret }; 1631198090Srdivacky return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1632198090Srdivacky } 1633198090Srdivacky} 1634198090Srdivacky 1635198090Srdivacky/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has 1636198090Srdivacky/// any uses which require the SF or OF bits to be accurate. 1637198090Srdivackystatic bool HasNoSignedComparisonUses(SDNode *N) { 1638198090Srdivacky // Examine each user of the node. 1639198090Srdivacky for (SDNode::use_iterator UI = N->use_begin(), 1640198090Srdivacky UE = N->use_end(); UI != UE; ++UI) { 1641198090Srdivacky // Only examine CopyToReg uses. 1642198090Srdivacky if (UI->getOpcode() != ISD::CopyToReg) 1643198090Srdivacky return false; 1644198090Srdivacky // Only examine CopyToReg uses that copy to EFLAGS. 1645198090Srdivacky if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 1646198090Srdivacky X86::EFLAGS) 1647198090Srdivacky return false; 1648198090Srdivacky // Examine each user of the CopyToReg use. 1649198090Srdivacky for (SDNode::use_iterator FlagUI = UI->use_begin(), 1650198090Srdivacky FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 1651198090Srdivacky // Only examine the Flag result. 1652198090Srdivacky if (FlagUI.getUse().getResNo() != 1) continue; 1653198090Srdivacky // Anything unusual: assume conservatively. 1654198090Srdivacky if (!FlagUI->isMachineOpcode()) return false; 1655198090Srdivacky // Examine the opcode of the user. 1656198090Srdivacky switch (FlagUI->getMachineOpcode()) { 1657198090Srdivacky // These comparisons don't treat the most significant bit specially. 1658198090Srdivacky case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 1659198090Srdivacky case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 1660198090Srdivacky case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 1661198090Srdivacky case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 1662198090Srdivacky case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: 1663198090Srdivacky case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: 1664198090Srdivacky case X86::CMOVA16rr: case X86::CMOVA16rm: 1665198090Srdivacky case X86::CMOVA32rr: case X86::CMOVA32rm: 1666198090Srdivacky case X86::CMOVA64rr: case X86::CMOVA64rm: 1667198090Srdivacky case X86::CMOVAE16rr: case X86::CMOVAE16rm: 1668198090Srdivacky case X86::CMOVAE32rr: case X86::CMOVAE32rm: 1669198090Srdivacky case X86::CMOVAE64rr: case X86::CMOVAE64rm: 1670198090Srdivacky case X86::CMOVB16rr: case X86::CMOVB16rm: 1671198090Srdivacky case X86::CMOVB32rr: case X86::CMOVB32rm: 1672198090Srdivacky case X86::CMOVB64rr: case X86::CMOVB64rm: 1673198090Srdivacky case X86::CMOVBE16rr: case X86::CMOVBE16rm: 1674198090Srdivacky case X86::CMOVBE32rr: case X86::CMOVBE32rm: 1675198090Srdivacky case X86::CMOVBE64rr: case X86::CMOVBE64rm: 1676198090Srdivacky case X86::CMOVE16rr: case X86::CMOVE16rm: 1677198090Srdivacky case X86::CMOVE32rr: case X86::CMOVE32rm: 1678198090Srdivacky case X86::CMOVE64rr: case X86::CMOVE64rm: 1679198090Srdivacky case X86::CMOVNE16rr: case X86::CMOVNE16rm: 1680198090Srdivacky case X86::CMOVNE32rr: case X86::CMOVNE32rm: 1681198090Srdivacky case X86::CMOVNE64rr: case X86::CMOVNE64rm: 1682198090Srdivacky case X86::CMOVNP16rr: case X86::CMOVNP16rm: 1683198090Srdivacky case X86::CMOVNP32rr: case X86::CMOVNP32rm: 1684198090Srdivacky case X86::CMOVNP64rr: case X86::CMOVNP64rm: 1685198090Srdivacky case X86::CMOVP16rr: case X86::CMOVP16rm: 1686198090Srdivacky case X86::CMOVP32rr: case X86::CMOVP32rm: 1687198090Srdivacky case X86::CMOVP64rr: case X86::CMOVP64rm: 1688198090Srdivacky continue; 1689198090Srdivacky // Anything else: assume conservatively. 1690198090Srdivacky default: return false; 1691198090Srdivacky } 1692198090Srdivacky } 1693198090Srdivacky } 1694198090Srdivacky return true; 1695198090Srdivacky} 1696198090Srdivacky 1697193323SedSDNode *X86DAGToDAGISel::Select(SDValue N) { 1698193323Sed SDNode *Node = N.getNode(); 1699198090Srdivacky EVT NVT = Node->getValueType(0); 1700193323Sed unsigned Opc, MOpc; 1701193323Sed unsigned Opcode = Node->getOpcode(); 1702193323Sed DebugLoc dl = Node->getDebugLoc(); 1703193323Sed 1704193323Sed#ifndef NDEBUG 1705198090Srdivacky DEBUG({ 1706198090Srdivacky errs() << std::string(Indent, ' ') << "Selecting: "; 1707198090Srdivacky Node->dump(CurDAG); 1708198090Srdivacky errs() << '\n'; 1709198090Srdivacky }); 1710193323Sed Indent += 2; 1711193323Sed#endif 1712193323Sed 1713193323Sed if (Node->isMachineOpcode()) { 1714193323Sed#ifndef NDEBUG 1715198090Srdivacky DEBUG({ 1716198090Srdivacky errs() << std::string(Indent-2, ' ') << "== "; 1717198090Srdivacky Node->dump(CurDAG); 1718198090Srdivacky errs() << '\n'; 1719198090Srdivacky }); 1720193323Sed Indent -= 2; 1721193323Sed#endif 1722193323Sed return NULL; // Already selected. 1723193323Sed } 1724193323Sed 1725193323Sed switch (Opcode) { 1726198090Srdivacky default: break; 1727198090Srdivacky case X86ISD::GlobalBaseReg: 1728198090Srdivacky return getGlobalBaseReg(); 1729193323Sed 1730198090Srdivacky case X86ISD::ATOMOR64_DAG: 1731198090Srdivacky return SelectAtomic64(Node, X86::ATOMOR6432); 1732198090Srdivacky case X86ISD::ATOMXOR64_DAG: 1733198090Srdivacky return SelectAtomic64(Node, X86::ATOMXOR6432); 1734198090Srdivacky case X86ISD::ATOMADD64_DAG: 1735198090Srdivacky return SelectAtomic64(Node, X86::ATOMADD6432); 1736198090Srdivacky case X86ISD::ATOMSUB64_DAG: 1737198090Srdivacky return SelectAtomic64(Node, X86::ATOMSUB6432); 1738198090Srdivacky case X86ISD::ATOMNAND64_DAG: 1739198090Srdivacky return SelectAtomic64(Node, X86::ATOMNAND6432); 1740198090Srdivacky case X86ISD::ATOMAND64_DAG: 1741198090Srdivacky return SelectAtomic64(Node, X86::ATOMAND6432); 1742198090Srdivacky case X86ISD::ATOMSWAP64_DAG: 1743198090Srdivacky return SelectAtomic64(Node, X86::ATOMSWAP6432); 1744193323Sed 1745198090Srdivacky case ISD::ATOMIC_LOAD_ADD: { 1746198090Srdivacky SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); 1747198090Srdivacky if (RetVal) 1748198090Srdivacky return RetVal; 1749198090Srdivacky break; 1750198090Srdivacky } 1751193323Sed 1752198090Srdivacky case ISD::SMUL_LOHI: 1753198090Srdivacky case ISD::UMUL_LOHI: { 1754198090Srdivacky SDValue N0 = Node->getOperand(0); 1755198090Srdivacky SDValue N1 = Node->getOperand(1); 1756193323Sed 1757198090Srdivacky bool isSigned = Opcode == ISD::SMUL_LOHI; 1758198090Srdivacky if (!isSigned) { 1759198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1760198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1761198090Srdivacky case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1762198090Srdivacky case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1763198090Srdivacky case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1764198090Srdivacky case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1765193323Sed } 1766198090Srdivacky } else { 1767198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1768198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1769198090Srdivacky case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1770198090Srdivacky case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1771198090Srdivacky case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1772198090Srdivacky case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1773193323Sed } 1774198090Srdivacky } 1775193323Sed 1776198090Srdivacky unsigned LoReg, HiReg; 1777198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1778198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1779198090Srdivacky case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1780198090Srdivacky case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1781198090Srdivacky case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1782198090Srdivacky case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1783198090Srdivacky } 1784193323Sed 1785198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1786198090Srdivacky bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1787198090Srdivacky // Multiply is commmutative. 1788198090Srdivacky if (!foldedLoad) { 1789198090Srdivacky foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1790198090Srdivacky if (foldedLoad) 1791198090Srdivacky std::swap(N0, N1); 1792198090Srdivacky } 1793193323Sed 1794198090Srdivacky SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 1795198090Srdivacky N0, SDValue()).getValue(1); 1796198090Srdivacky 1797198090Srdivacky if (foldedLoad) { 1798198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1799198090Srdivacky InFlag }; 1800198090Srdivacky SDNode *CNode = 1801198090Srdivacky CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1802198090Srdivacky array_lengthof(Ops)); 1803198090Srdivacky InFlag = SDValue(CNode, 1); 1804198090Srdivacky // Update the chain. 1805198090Srdivacky ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1806198090Srdivacky } else { 1807198090Srdivacky InFlag = 1808198090Srdivacky SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1809198090Srdivacky } 1810198090Srdivacky 1811198090Srdivacky // Copy the low half of the result, if it is needed. 1812198090Srdivacky if (!N.getValue(0).use_empty()) { 1813198090Srdivacky SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1814198090Srdivacky LoReg, NVT, InFlag); 1815198090Srdivacky InFlag = Result.getValue(2); 1816198090Srdivacky ReplaceUses(N.getValue(0), Result); 1817193323Sed#ifndef NDEBUG 1818198090Srdivacky DEBUG({ 1819198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 1820198090Srdivacky Result.getNode()->dump(CurDAG); 1821198090Srdivacky errs() << '\n'; 1822198090Srdivacky }); 1823193323Sed#endif 1824198090Srdivacky } 1825198090Srdivacky // Copy the high half of the result, if it is needed. 1826198090Srdivacky if (!N.getValue(1).use_empty()) { 1827198090Srdivacky SDValue Result; 1828198090Srdivacky if (HiReg == X86::AH && Subtarget->is64Bit()) { 1829198090Srdivacky // Prevent use of AH in a REX instruction by referencing AX instead. 1830198090Srdivacky // Shift it down 8 bits. 1831198090Srdivacky Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1832198090Srdivacky X86::AX, MVT::i16, InFlag); 1833198090Srdivacky InFlag = Result.getValue(2); 1834198090Srdivacky Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 1835198090Srdivacky Result, 1836198090Srdivacky CurDAG->getTargetConstant(8, MVT::i8)), 0); 1837198090Srdivacky // Then truncate it down to i8. 1838198090Srdivacky Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 1839198090Srdivacky MVT::i8, Result); 1840198090Srdivacky } else { 1841198090Srdivacky Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1842198090Srdivacky HiReg, NVT, InFlag); 1843198090Srdivacky InFlag = Result.getValue(2); 1844193323Sed } 1845198090Srdivacky ReplaceUses(N.getValue(1), Result); 1846193323Sed#ifndef NDEBUG 1847198090Srdivacky DEBUG({ 1848198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 1849198090Srdivacky Result.getNode()->dump(CurDAG); 1850198090Srdivacky errs() << '\n'; 1851198090Srdivacky }); 1852193323Sed#endif 1853198090Srdivacky } 1854193323Sed 1855193323Sed#ifndef NDEBUG 1856198090Srdivacky Indent -= 2; 1857193323Sed#endif 1858193323Sed 1859198090Srdivacky return NULL; 1860198090Srdivacky } 1861193323Sed 1862198090Srdivacky case ISD::SDIVREM: 1863198090Srdivacky case ISD::UDIVREM: { 1864198090Srdivacky SDValue N0 = Node->getOperand(0); 1865198090Srdivacky SDValue N1 = Node->getOperand(1); 1866193323Sed 1867198090Srdivacky bool isSigned = Opcode == ISD::SDIVREM; 1868198090Srdivacky if (!isSigned) { 1869198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1870198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1871198090Srdivacky case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1872198090Srdivacky case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1873198090Srdivacky case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1874198090Srdivacky case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1875193323Sed } 1876198090Srdivacky } else { 1877198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1878198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1879198090Srdivacky case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1880198090Srdivacky case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1881198090Srdivacky case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1882198090Srdivacky case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1883198090Srdivacky } 1884198090Srdivacky } 1885193323Sed 1886198090Srdivacky unsigned LoReg, HiReg; 1887198090Srdivacky unsigned ClrOpcode, SExtOpcode; 1888198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1889198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1890198090Srdivacky case MVT::i8: 1891198090Srdivacky LoReg = X86::AL; HiReg = X86::AH; 1892198090Srdivacky ClrOpcode = 0; 1893198090Srdivacky SExtOpcode = X86::CBW; 1894198090Srdivacky break; 1895198090Srdivacky case MVT::i16: 1896198090Srdivacky LoReg = X86::AX; HiReg = X86::DX; 1897198090Srdivacky ClrOpcode = X86::MOV16r0; 1898198090Srdivacky SExtOpcode = X86::CWD; 1899198090Srdivacky break; 1900198090Srdivacky case MVT::i32: 1901198090Srdivacky LoReg = X86::EAX; HiReg = X86::EDX; 1902198090Srdivacky ClrOpcode = X86::MOV32r0; 1903198090Srdivacky SExtOpcode = X86::CDQ; 1904198090Srdivacky break; 1905198090Srdivacky case MVT::i64: 1906198090Srdivacky LoReg = X86::RAX; HiReg = X86::RDX; 1907198090Srdivacky ClrOpcode = ~0U; // NOT USED. 1908198090Srdivacky SExtOpcode = X86::CQO; 1909198090Srdivacky break; 1910198090Srdivacky } 1911193323Sed 1912198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1913198090Srdivacky bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1914198090Srdivacky bool signBitIsZero = CurDAG->SignBitIsZero(N0); 1915198090Srdivacky 1916198090Srdivacky SDValue InFlag; 1917198090Srdivacky if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 1918198090Srdivacky // Special case for div8, just use a move with zero extension to AX to 1919198090Srdivacky // clear the upper 8 bits (AH). 1920198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 1921198090Srdivacky if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 1922198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 1923198090Srdivacky Move = 1924198090Srdivacky SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, 1925198090Srdivacky MVT::Other, Ops, 1926198090Srdivacky array_lengthof(Ops)), 0); 1927198090Srdivacky Chain = Move.getValue(1); 1928198090Srdivacky ReplaceUses(N0.getValue(1), Chain); 1929193323Sed } else { 1930198090Srdivacky Move = 1931198090Srdivacky SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); 1932198090Srdivacky Chain = CurDAG->getEntryNode(); 1933198090Srdivacky } 1934198090Srdivacky Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); 1935198090Srdivacky InFlag = Chain.getValue(1); 1936198090Srdivacky } else { 1937198090Srdivacky InFlag = 1938198090Srdivacky CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 1939198090Srdivacky LoReg, N0, SDValue()).getValue(1); 1940198090Srdivacky if (isSigned && !signBitIsZero) { 1941198090Srdivacky // Sign extend the low part into the high part. 1942193323Sed InFlag = 1943198090Srdivacky SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); 1944198090Srdivacky } else { 1945198090Srdivacky // Zero out the high part, effectively zero extending the input. 1946198090Srdivacky SDValue ClrNode; 1947198090Srdivacky 1948198090Srdivacky if (NVT.getSimpleVT() == MVT::i64) { 1949198090Srdivacky ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), 1950198090Srdivacky 0); 1951198090Srdivacky // We just did a 32-bit clear, insert it into a 64-bit register to 1952198090Srdivacky // clear the whole 64-bit reg. 1953198090Srdivacky SDValue Undef = 1954198090Srdivacky SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, 1955198090Srdivacky dl, MVT::i64), 0); 1956198090Srdivacky SDValue SubRegNo = 1957198090Srdivacky CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); 1958198090Srdivacky ClrNode = 1959198090Srdivacky SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, 1960198090Srdivacky MVT::i64, Undef, ClrNode, SubRegNo), 1961198090Srdivacky 0); 1962193323Sed } else { 1963198090Srdivacky ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); 1964193323Sed } 1965193323Sed 1966198090Srdivacky InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, 1967198090Srdivacky ClrNode, InFlag).getValue(1); 1968193323Sed } 1969198090Srdivacky } 1970193323Sed 1971198090Srdivacky if (foldedLoad) { 1972198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1973198090Srdivacky InFlag }; 1974198090Srdivacky SDNode *CNode = 1975198090Srdivacky CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1976198090Srdivacky array_lengthof(Ops)); 1977198090Srdivacky InFlag = SDValue(CNode, 1); 1978198090Srdivacky // Update the chain. 1979198090Srdivacky ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1980198090Srdivacky } else { 1981198090Srdivacky InFlag = 1982198090Srdivacky SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1983198090Srdivacky } 1984198090Srdivacky 1985198090Srdivacky // Copy the division (low) result, if it is needed. 1986198090Srdivacky if (!N.getValue(0).use_empty()) { 1987198090Srdivacky SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1988198090Srdivacky LoReg, NVT, InFlag); 1989198090Srdivacky InFlag = Result.getValue(2); 1990198090Srdivacky ReplaceUses(N.getValue(0), Result); 1991193323Sed#ifndef NDEBUG 1992198090Srdivacky DEBUG({ 1993198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 1994198090Srdivacky Result.getNode()->dump(CurDAG); 1995198090Srdivacky errs() << '\n'; 1996198090Srdivacky }); 1997193323Sed#endif 1998198090Srdivacky } 1999198090Srdivacky // Copy the remainder (high) result, if it is needed. 2000198090Srdivacky if (!N.getValue(1).use_empty()) { 2001198090Srdivacky SDValue Result; 2002198090Srdivacky if (HiReg == X86::AH && Subtarget->is64Bit()) { 2003198090Srdivacky // Prevent use of AH in a REX instruction by referencing AX instead. 2004198090Srdivacky // Shift it down 8 bits. 2005198090Srdivacky Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2006198090Srdivacky X86::AX, MVT::i16, InFlag); 2007198090Srdivacky InFlag = Result.getValue(2); 2008198090Srdivacky Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2009198090Srdivacky Result, 2010198090Srdivacky CurDAG->getTargetConstant(8, MVT::i8)), 2011198090Srdivacky 0); 2012198090Srdivacky // Then truncate it down to i8. 2013198090Srdivacky Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 2014198090Srdivacky MVT::i8, Result); 2015198090Srdivacky } else { 2016198090Srdivacky Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2017198090Srdivacky HiReg, NVT, InFlag); 2018198090Srdivacky InFlag = Result.getValue(2); 2019193323Sed } 2020198090Srdivacky ReplaceUses(N.getValue(1), Result); 2021193323Sed#ifndef NDEBUG 2022198090Srdivacky DEBUG({ 2023198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 2024198090Srdivacky Result.getNode()->dump(CurDAG); 2025198090Srdivacky errs() << '\n'; 2026198090Srdivacky }); 2027193323Sed#endif 2028198090Srdivacky } 2029193323Sed 2030193323Sed#ifndef NDEBUG 2031198090Srdivacky Indent -= 2; 2032193323Sed#endif 2033193323Sed 2034198090Srdivacky return NULL; 2035198090Srdivacky } 2036193323Sed 2037198090Srdivacky case X86ISD::CMP: { 2038198090Srdivacky SDValue N0 = Node->getOperand(0); 2039198090Srdivacky SDValue N1 = Node->getOperand(1); 2040198090Srdivacky 2041198090Srdivacky // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 2042198090Srdivacky // use a smaller encoding. 2043198090Srdivacky if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 2044198090Srdivacky N0.getValueType() != MVT::i8 && 2045198090Srdivacky X86::isZeroNode(N1)) { 2046198090Srdivacky ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 2047198090Srdivacky if (!C) break; 2048198090Srdivacky 2049198090Srdivacky // For example, convert "testl %eax, $8" to "testb %al, $8" 2050198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 2051198090Srdivacky (!(C->getZExtValue() & 0x80) || 2052198090Srdivacky HasNoSignedComparisonUses(Node))) { 2053198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); 2054198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2055198090Srdivacky 2056198090Srdivacky // On x86-32, only the ABCD registers have 8-bit subregisters. 2057198090Srdivacky if (!Subtarget->is64Bit()) { 2058198090Srdivacky TargetRegisterClass *TRC = 0; 2059198090Srdivacky switch (N0.getValueType().getSimpleVT().SimpleTy) { 2060198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2061198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2062198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2063198090Srdivacky } 2064198090Srdivacky SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2065198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2066198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2067198090Srdivacky } 2068198090Srdivacky 2069198090Srdivacky // Extract the l-register. 2070198090Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 2071198090Srdivacky MVT::i8, Reg); 2072198090Srdivacky 2073198090Srdivacky // Emit a testb. 2074198090Srdivacky return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); 2075193323Sed } 2076198090Srdivacky 2077198090Srdivacky // For example, "testl %eax, $2048" to "testb %ah, $8". 2078198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 2079198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2080198090Srdivacky HasNoSignedComparisonUses(Node))) { 2081198090Srdivacky // Shift the immediate right by 8 bits. 2082198090Srdivacky SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 2083198090Srdivacky MVT::i8); 2084198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2085198090Srdivacky 2086198090Srdivacky // Put the value in an ABCD register. 2087198090Srdivacky TargetRegisterClass *TRC = 0; 2088198090Srdivacky switch (N0.getValueType().getSimpleVT().SimpleTy) { 2089198090Srdivacky case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 2090198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2091198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2092198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2093198090Srdivacky } 2094198090Srdivacky SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2095198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2096198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2097198090Srdivacky 2098198090Srdivacky // Extract the h-register. 2099198090Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, 2100198090Srdivacky MVT::i8, Reg); 2101198090Srdivacky 2102198090Srdivacky // Emit a testb. No special NOREX tricks are needed since there's 2103198090Srdivacky // only one GPR operand! 2104198090Srdivacky return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 2105198090Srdivacky Subreg, ShiftedImm); 2106193323Sed } 2107198090Srdivacky 2108198090Srdivacky // For example, "testl %eax, $32776" to "testw %ax, $32776". 2109198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 2110198090Srdivacky N0.getValueType() != MVT::i16 && 2111198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2112198090Srdivacky HasNoSignedComparisonUses(Node))) { 2113198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); 2114198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2115198090Srdivacky 2116198090Srdivacky // Extract the 16-bit subregister. 2117198090Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, 2118198090Srdivacky MVT::i16, Reg); 2119198090Srdivacky 2120198090Srdivacky // Emit a testw. 2121198090Srdivacky return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); 2122193323Sed } 2123198090Srdivacky 2124198090Srdivacky // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 2125198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 2126198090Srdivacky N0.getValueType() == MVT::i64 && 2127198090Srdivacky (!(C->getZExtValue() & 0x80000000) || 2128198090Srdivacky HasNoSignedComparisonUses(Node))) { 2129198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 2130198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2131198090Srdivacky 2132198090Srdivacky // Extract the 32-bit subregister. 2133198090Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, 2134198090Srdivacky MVT::i32, Reg); 2135198090Srdivacky 2136198090Srdivacky // Emit a testl. 2137198090Srdivacky return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); 2138198090Srdivacky } 2139193323Sed } 2140198090Srdivacky break; 2141193323Sed } 2142198090Srdivacky } 2143193323Sed 2144193323Sed SDNode *ResNode = SelectCode(N); 2145193323Sed 2146193323Sed#ifndef NDEBUG 2147198090Srdivacky DEBUG({ 2148198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 2149198090Srdivacky if (ResNode == NULL || ResNode == N.getNode()) 2150198090Srdivacky N.getNode()->dump(CurDAG); 2151198090Srdivacky else 2152198090Srdivacky ResNode->dump(CurDAG); 2153198090Srdivacky errs() << '\n'; 2154198090Srdivacky }); 2155193323Sed Indent -= 2; 2156193323Sed#endif 2157193323Sed 2158193323Sed return ResNode; 2159193323Sed} 2160193323Sed 2161193323Sedbool X86DAGToDAGISel:: 2162193323SedSelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 2163193323Sed std::vector<SDValue> &OutOps) { 2164193323Sed SDValue Op0, Op1, Op2, Op3, Op4; 2165193323Sed switch (ConstraintCode) { 2166193323Sed case 'o': // offsetable ?? 2167193323Sed case 'v': // not offsetable ?? 2168193323Sed default: return true; 2169193323Sed case 'm': // memory 2170193323Sed if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) 2171193323Sed return true; 2172193323Sed break; 2173193323Sed } 2174193323Sed 2175193323Sed OutOps.push_back(Op0); 2176193323Sed OutOps.push_back(Op1); 2177193323Sed OutOps.push_back(Op2); 2178193323Sed OutOps.push_back(Op3); 2179193323Sed OutOps.push_back(Op4); 2180193323Sed return false; 2181193323Sed} 2182193323Sed 2183193323Sed/// createX86ISelDag - This pass converts a legalized DAG into a 2184193323Sed/// X86-specific DAG, ready for instruction scheduling. 2185193323Sed/// 2186193323SedFunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 2187193323Sed llvm::CodeGenOpt::Level OptLevel) { 2188193323Sed return new X86DAGToDAGISel(TM, OptLevel); 2189193323Sed} 2190