X86ISelDAGToDAG.cpp revision 198090
1193323Sed//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file defines a DAG pattern matching instruction selector for X86, 11193323Sed// converting from a legalized dag to a X86 dag. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#define DEBUG_TYPE "x86-isel" 16193323Sed#include "X86.h" 17193323Sed#include "X86InstrBuilder.h" 18193323Sed#include "X86ISelLowering.h" 19193323Sed#include "X86MachineFunctionInfo.h" 20193323Sed#include "X86RegisterInfo.h" 21193323Sed#include "X86Subtarget.h" 22193323Sed#include "X86TargetMachine.h" 23193323Sed#include "llvm/GlobalValue.h" 24193323Sed#include "llvm/Instructions.h" 25193323Sed#include "llvm/Intrinsics.h" 26193323Sed#include "llvm/Support/CFG.h" 27193323Sed#include "llvm/Type.h" 28193323Sed#include "llvm/CodeGen/MachineConstantPool.h" 29193323Sed#include "llvm/CodeGen/MachineFunction.h" 30193323Sed#include "llvm/CodeGen/MachineFrameInfo.h" 31193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 32193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 33193323Sed#include "llvm/CodeGen/SelectionDAGISel.h" 34193323Sed#include "llvm/Target/TargetMachine.h" 35193323Sed#include "llvm/Target/TargetOptions.h" 36193323Sed#include "llvm/Support/Compiler.h" 37193323Sed#include "llvm/Support/Debug.h" 38198090Srdivacky#include "llvm/Support/ErrorHandling.h" 39193323Sed#include "llvm/Support/MathExtras.h" 40198090Srdivacky#include "llvm/Support/raw_ostream.h" 41193323Sed#include "llvm/ADT/SmallPtrSet.h" 42193323Sed#include "llvm/ADT/Statistic.h" 43193323Sedusing namespace llvm; 44193323Sed 45193323Sed#include "llvm/Support/CommandLine.h" 46193323Sedstatic cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); 47193323Sed 48193323SedSTATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 49193323Sed 50193323Sed//===----------------------------------------------------------------------===// 51193323Sed// Pattern Matcher Implementation 52193323Sed//===----------------------------------------------------------------------===// 53193323Sed 54193323Sednamespace { 55193323Sed /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 56193323Sed /// SDValue's instead of register numbers for the leaves of the matched 57193323Sed /// tree. 58193323Sed struct X86ISelAddressMode { 59193323Sed enum { 60193323Sed RegBase, 61193323Sed FrameIndexBase 62193323Sed } BaseType; 63193323Sed 64193323Sed struct { // This is really a union, discriminated by BaseType! 65193323Sed SDValue Reg; 66193323Sed int FrameIndex; 67193323Sed } Base; 68193323Sed 69193323Sed unsigned Scale; 70193323Sed SDValue IndexReg; 71193323Sed int32_t Disp; 72193323Sed SDValue Segment; 73193323Sed GlobalValue *GV; 74193323Sed Constant *CP; 75193323Sed const char *ES; 76193323Sed int JT; 77193323Sed unsigned Align; // CP alignment. 78195098Sed unsigned char SymbolFlags; // X86II::MO_* 79193323Sed 80193323Sed X86ISelAddressMode() 81195098Sed : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), 82198090Srdivacky Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), 83198090Srdivacky SymbolFlags(X86II::MO_NO_FLAG) { 84193323Sed } 85193323Sed 86193323Sed bool hasSymbolicDisplacement() const { 87193323Sed return GV != 0 || CP != 0 || ES != 0 || JT != -1; 88193323Sed } 89195098Sed 90195098Sed bool hasBaseOrIndexReg() const { 91195098Sed return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; 92195098Sed } 93195098Sed 94195098Sed /// isRIPRelative - Return true if this addressing mode is already RIP 95195098Sed /// relative. 96195098Sed bool isRIPRelative() const { 97195098Sed if (BaseType != RegBase) return false; 98195098Sed if (RegisterSDNode *RegNode = 99195098Sed dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) 100195098Sed return RegNode->getReg() == X86::RIP; 101195098Sed return false; 102195098Sed } 103195098Sed 104195098Sed void setBaseReg(SDValue Reg) { 105195098Sed BaseType = RegBase; 106195098Sed Base.Reg = Reg; 107195098Sed } 108193323Sed 109193323Sed void dump() { 110198090Srdivacky errs() << "X86ISelAddressMode " << this << '\n'; 111198090Srdivacky errs() << "Base.Reg "; 112198090Srdivacky if (Base.Reg.getNode() != 0) 113198090Srdivacky Base.Reg.getNode()->dump(); 114198090Srdivacky else 115198090Srdivacky errs() << "nul"; 116198090Srdivacky errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' 117198090Srdivacky << " Scale" << Scale << '\n' 118198090Srdivacky << "IndexReg "; 119198090Srdivacky if (IndexReg.getNode() != 0) 120198090Srdivacky IndexReg.getNode()->dump(); 121198090Srdivacky else 122198090Srdivacky errs() << "nul"; 123198090Srdivacky errs() << " Disp " << Disp << '\n' 124198090Srdivacky << "GV "; 125198090Srdivacky if (GV) 126198090Srdivacky GV->dump(); 127198090Srdivacky else 128198090Srdivacky errs() << "nul"; 129198090Srdivacky errs() << " CP "; 130198090Srdivacky if (CP) 131198090Srdivacky CP->dump(); 132198090Srdivacky else 133198090Srdivacky errs() << "nul"; 134198090Srdivacky errs() << '\n' 135198090Srdivacky << "ES "; 136198090Srdivacky if (ES) 137198090Srdivacky errs() << ES; 138198090Srdivacky else 139198090Srdivacky errs() << "nul"; 140198090Srdivacky errs() << " JT" << JT << " Align" << Align << '\n'; 141193323Sed } 142193323Sed }; 143193323Sed} 144193323Sed 145193323Sednamespace { 146193323Sed //===--------------------------------------------------------------------===// 147193323Sed /// ISel - X86 specific code to select X86 machine instructions for 148193323Sed /// SelectionDAG operations. 149193323Sed /// 150193323Sed class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 151193323Sed /// X86Lowering - This object fully describes how to lower LLVM code to an 152193323Sed /// X86-specific SelectionDAG. 153193323Sed X86TargetLowering &X86Lowering; 154193323Sed 155193323Sed /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 156193323Sed /// make the right decision when generating code for different targets. 157193323Sed const X86Subtarget *Subtarget; 158193323Sed 159193323Sed /// OptForSize - If true, selector should try to optimize for code size 160193323Sed /// instead of performance. 161193323Sed bool OptForSize; 162193323Sed 163193323Sed public: 164193323Sed explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 165193323Sed : SelectionDAGISel(tm, OptLevel), 166193399Sed X86Lowering(*tm.getTargetLowering()), 167193399Sed Subtarget(&tm.getSubtarget<X86Subtarget>()), 168193323Sed OptForSize(false) {} 169193323Sed 170193323Sed virtual const char *getPassName() const { 171193323Sed return "X86 DAG->DAG Instruction Selection"; 172193323Sed } 173193323Sed 174193323Sed /// InstructionSelect - This callback is invoked by 175193323Sed /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 176193323Sed virtual void InstructionSelect(); 177193323Sed 178193323Sed virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 179193323Sed 180193323Sed virtual 181193323Sed bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; 182193323Sed 183193323Sed// Include the pieces autogenerated from the target description. 184193323Sed#include "X86GenDAGISel.inc" 185193323Sed 186193323Sed private: 187193323Sed SDNode *Select(SDValue N); 188193323Sed SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 189198090Srdivacky SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); 190193323Sed 191193323Sed bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); 192193323Sed bool MatchLoad(SDValue N, X86ISelAddressMode &AM); 193193323Sed bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 194198090Srdivacky bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 195198090Srdivacky bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 196198090Srdivacky unsigned Depth); 197193323Sed bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 198193323Sed bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, 199193323Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 200193323Sed SDValue &Segment); 201193323Sed bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, 202193323Sed SDValue &Scale, SDValue &Index, SDValue &Disp); 203194612Sed bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 204194612Sed SDValue &Scale, SDValue &Index, SDValue &Disp); 205193323Sed bool SelectScalarSSELoad(SDValue Op, SDValue Pred, 206193323Sed SDValue N, SDValue &Base, SDValue &Scale, 207193323Sed SDValue &Index, SDValue &Disp, 208193323Sed SDValue &Segment, 209193323Sed SDValue &InChain, SDValue &OutChain); 210193323Sed bool TryFoldLoad(SDValue P, SDValue N, 211193323Sed SDValue &Base, SDValue &Scale, 212193323Sed SDValue &Index, SDValue &Disp, 213193323Sed SDValue &Segment); 214193323Sed void PreprocessForRMW(); 215193323Sed void PreprocessForFPConvert(); 216193323Sed 217193323Sed /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 218193323Sed /// inline asm expressions. 219193323Sed virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 220193323Sed char ConstraintCode, 221193323Sed std::vector<SDValue> &OutOps); 222193323Sed 223193323Sed void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 224193323Sed 225193323Sed inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 226193323Sed SDValue &Scale, SDValue &Index, 227193323Sed SDValue &Disp, SDValue &Segment) { 228193323Sed Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 229193323Sed CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 230193323Sed AM.Base.Reg; 231193323Sed Scale = getI8Imm(AM.Scale); 232193323Sed Index = AM.IndexReg; 233193323Sed // These are 32-bit even in 64-bit mode since RIP relative offset 234193323Sed // is 32-bit. 235193323Sed if (AM.GV) 236195098Sed Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, 237195098Sed AM.SymbolFlags); 238193323Sed else if (AM.CP) 239193323Sed Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 240195098Sed AM.Align, AM.Disp, AM.SymbolFlags); 241193323Sed else if (AM.ES) 242195098Sed Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 243193323Sed else if (AM.JT != -1) 244195098Sed Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 245193323Sed else 246193323Sed Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 247193323Sed 248193323Sed if (AM.Segment.getNode()) 249193323Sed Segment = AM.Segment; 250193323Sed else 251193323Sed Segment = CurDAG->getRegister(0, MVT::i32); 252193323Sed } 253193323Sed 254193323Sed /// getI8Imm - Return a target constant with the specified value, of type 255193323Sed /// i8. 256193323Sed inline SDValue getI8Imm(unsigned Imm) { 257193323Sed return CurDAG->getTargetConstant(Imm, MVT::i8); 258193323Sed } 259193323Sed 260193323Sed /// getI16Imm - Return a target constant with the specified value, of type 261193323Sed /// i16. 262193323Sed inline SDValue getI16Imm(unsigned Imm) { 263193323Sed return CurDAG->getTargetConstant(Imm, MVT::i16); 264193323Sed } 265193323Sed 266193323Sed /// getI32Imm - Return a target constant with the specified value, of type 267193323Sed /// i32. 268193323Sed inline SDValue getI32Imm(unsigned Imm) { 269193323Sed return CurDAG->getTargetConstant(Imm, MVT::i32); 270193323Sed } 271193323Sed 272193323Sed /// getGlobalBaseReg - Return an SDNode that returns the value of 273193323Sed /// the global base register. Output instructions required to 274193323Sed /// initialize the global base register, if necessary. 275193323Sed /// 276193323Sed SDNode *getGlobalBaseReg(); 277193323Sed 278193399Sed /// getTargetMachine - Return a reference to the TargetMachine, casted 279193399Sed /// to the target-specific type. 280193399Sed const X86TargetMachine &getTargetMachine() { 281193399Sed return static_cast<const X86TargetMachine &>(TM); 282193399Sed } 283193399Sed 284193399Sed /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 285193399Sed /// to the target-specific type. 286193399Sed const X86InstrInfo *getInstrInfo() { 287193399Sed return getTargetMachine().getInstrInfo(); 288193399Sed } 289193399Sed 290193323Sed#ifndef NDEBUG 291193323Sed unsigned Indent; 292193323Sed#endif 293193323Sed }; 294193323Sed} 295193323Sed 296193323Sed 297193323Sedbool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, 298193323Sed SDNode *Root) const { 299193323Sed if (OptLevel == CodeGenOpt::None) return false; 300193323Sed 301193323Sed if (U == Root) 302193323Sed switch (U->getOpcode()) { 303193323Sed default: break; 304193323Sed case ISD::ADD: 305193323Sed case ISD::ADDC: 306193323Sed case ISD::ADDE: 307193323Sed case ISD::AND: 308193323Sed case ISD::OR: 309193323Sed case ISD::XOR: { 310193323Sed SDValue Op1 = U->getOperand(1); 311193323Sed 312193323Sed // If the other operand is a 8-bit immediate we should fold the immediate 313193323Sed // instead. This reduces code size. 314193323Sed // e.g. 315193323Sed // movl 4(%esp), %eax 316193323Sed // addl $4, %eax 317193323Sed // vs. 318193323Sed // movl $4, %eax 319193323Sed // addl 4(%esp), %eax 320193323Sed // The former is 2 bytes shorter. In case where the increment is 1, then 321193323Sed // the saving can be 4 bytes (by using incl %eax). 322193323Sed if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 323193323Sed if (Imm->getAPIntValue().isSignedIntN(8)) 324193323Sed return false; 325193323Sed 326193323Sed // If the other operand is a TLS address, we should fold it instead. 327193323Sed // This produces 328193323Sed // movl %gs:0, %eax 329193323Sed // leal i@NTPOFF(%eax), %eax 330193323Sed // instead of 331193323Sed // movl $i@NTPOFF, %eax 332193323Sed // addl %gs:0, %eax 333193323Sed // if the block also has an access to a second TLS address this will save 334193323Sed // a load. 335193323Sed // FIXME: This is probably also true for non TLS addresses. 336193323Sed if (Op1.getOpcode() == X86ISD::Wrapper) { 337193323Sed SDValue Val = Op1.getOperand(0); 338193323Sed if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 339193323Sed return false; 340193323Sed } 341193323Sed } 342193323Sed } 343193323Sed 344193323Sed // Proceed to 'generic' cycle finder code 345193323Sed return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); 346193323Sed} 347193323Sed 348193323Sed/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 349193323Sed/// and move load below the TokenFactor. Replace store's chain operand with 350193323Sed/// load's chain result. 351193323Sedstatic void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, 352193323Sed SDValue Store, SDValue TF) { 353193323Sed SmallVector<SDValue, 4> Ops; 354193323Sed for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) 355193323Sed if (Load.getNode() == TF.getOperand(i).getNode()) 356193323Sed Ops.push_back(Load.getOperand(0)); 357193323Sed else 358193323Sed Ops.push_back(TF.getOperand(i)); 359198090Srdivacky SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); 360198090Srdivacky SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, 361198090Srdivacky Load.getOperand(1), 362198090Srdivacky Load.getOperand(2)); 363198090Srdivacky CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), 364193323Sed Store.getOperand(2), Store.getOperand(3)); 365193323Sed} 366193323Sed 367198090Srdivacky/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The 368198090Srdivacky/// chain produced by the load must only be used by the store's chain operand, 369198090Srdivacky/// otherwise this may produce a cycle in the DAG. 370193323Sed/// 371193323Sedstatic bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, 372193323Sed SDValue &Load) { 373193323Sed if (N.getOpcode() == ISD::BIT_CONVERT) 374193323Sed N = N.getOperand(0); 375193323Sed 376193323Sed LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 377193323Sed if (!LD || LD->isVolatile()) 378193323Sed return false; 379193323Sed if (LD->getAddressingMode() != ISD::UNINDEXED) 380193323Sed return false; 381193323Sed 382193323Sed ISD::LoadExtType ExtType = LD->getExtensionType(); 383193323Sed if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 384193323Sed return false; 385193323Sed 386193323Sed if (N.hasOneUse() && 387198090Srdivacky LD->hasNUsesOfValue(1, 1) && 388193323Sed N.getOperand(1) == Address && 389198090Srdivacky LD->isOperandOf(Chain.getNode())) { 390193323Sed Load = N; 391193323Sed return true; 392193323Sed } 393193323Sed return false; 394193323Sed} 395193323Sed 396193323Sed/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain 397193323Sed/// operand and move load below the call's chain operand. 398193323Sedstatic void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, 399193323Sed SDValue Call, SDValue CallSeqStart) { 400193323Sed SmallVector<SDValue, 8> Ops; 401193323Sed SDValue Chain = CallSeqStart.getOperand(0); 402193323Sed if (Chain.getNode() == Load.getNode()) 403193323Sed Ops.push_back(Load.getOperand(0)); 404193323Sed else { 405193323Sed assert(Chain.getOpcode() == ISD::TokenFactor && 406193323Sed "Unexpected CallSeqStart chain operand"); 407193323Sed for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 408193323Sed if (Chain.getOperand(i).getNode() == Load.getNode()) 409193323Sed Ops.push_back(Load.getOperand(0)); 410193323Sed else 411193323Sed Ops.push_back(Chain.getOperand(i)); 412193323Sed SDValue NewChain = 413193323Sed CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), 414193323Sed MVT::Other, &Ops[0], Ops.size()); 415193323Sed Ops.clear(); 416193323Sed Ops.push_back(NewChain); 417193323Sed } 418193323Sed for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) 419193323Sed Ops.push_back(CallSeqStart.getOperand(i)); 420193323Sed CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); 421193323Sed CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), 422193323Sed Load.getOperand(1), Load.getOperand(2)); 423193323Sed Ops.clear(); 424193323Sed Ops.push_back(SDValue(Load.getNode(), 1)); 425193323Sed for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 426193323Sed Ops.push_back(Call.getOperand(i)); 427193323Sed CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); 428193323Sed} 429193323Sed 430193323Sed/// isCalleeLoad - Return true if call address is a load and it can be 431193323Sed/// moved below CALLSEQ_START and the chains leading up to the call. 432193323Sed/// Return the CALLSEQ_START by reference as a second output. 433193323Sedstatic bool isCalleeLoad(SDValue Callee, SDValue &Chain) { 434193323Sed if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 435193323Sed return false; 436193323Sed LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 437193323Sed if (!LD || 438193323Sed LD->isVolatile() || 439193323Sed LD->getAddressingMode() != ISD::UNINDEXED || 440193323Sed LD->getExtensionType() != ISD::NON_EXTLOAD) 441193323Sed return false; 442193323Sed 443193323Sed // Now let's find the callseq_start. 444193323Sed while (Chain.getOpcode() != ISD::CALLSEQ_START) { 445193323Sed if (!Chain.hasOneUse()) 446193323Sed return false; 447193323Sed Chain = Chain.getOperand(0); 448193323Sed } 449193323Sed 450193323Sed if (Chain.getOperand(0).getNode() == Callee.getNode()) 451193323Sed return true; 452193323Sed if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 453198090Srdivacky Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 454198090Srdivacky Callee.getValue(1).hasOneUse()) 455193323Sed return true; 456193323Sed return false; 457193323Sed} 458193323Sed 459193323Sed 460193323Sed/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 461193323Sed/// This is only run if not in -O0 mode. 462193323Sed/// This allows the instruction selector to pick more read-modify-write 463193323Sed/// instructions. This is a common case: 464193323Sed/// 465193323Sed/// [Load chain] 466193323Sed/// ^ 467193323Sed/// | 468193323Sed/// [Load] 469193323Sed/// ^ ^ 470193323Sed/// | | 471193323Sed/// / \- 472193323Sed/// / | 473193323Sed/// [TokenFactor] [Op] 474193323Sed/// ^ ^ 475193323Sed/// | | 476193323Sed/// \ / 477193323Sed/// \ / 478193323Sed/// [Store] 479193323Sed/// 480193323Sed/// The fact the store's chain operand != load's chain will prevent the 481193323Sed/// (store (op (load))) instruction from being selected. We can transform it to: 482193323Sed/// 483193323Sed/// [Load chain] 484193323Sed/// ^ 485193323Sed/// | 486193323Sed/// [TokenFactor] 487193323Sed/// ^ 488193323Sed/// | 489193323Sed/// [Load] 490193323Sed/// ^ ^ 491193323Sed/// | | 492193323Sed/// | \- 493193323Sed/// | | 494193323Sed/// | [Op] 495193323Sed/// | ^ 496193323Sed/// | | 497193323Sed/// \ / 498193323Sed/// \ / 499193323Sed/// [Store] 500193323Sedvoid X86DAGToDAGISel::PreprocessForRMW() { 501193323Sed for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 502193323Sed E = CurDAG->allnodes_end(); I != E; ++I) { 503193323Sed if (I->getOpcode() == X86ISD::CALL) { 504193323Sed /// Also try moving call address load from outside callseq_start to just 505193323Sed /// before the call to allow it to be folded. 506193323Sed /// 507193323Sed /// [Load chain] 508193323Sed /// ^ 509193323Sed /// | 510193323Sed /// [Load] 511193323Sed /// ^ ^ 512193323Sed /// | | 513193323Sed /// / \-- 514193323Sed /// / | 515193323Sed ///[CALLSEQ_START] | 516193323Sed /// ^ | 517193323Sed /// | | 518193323Sed /// [LOAD/C2Reg] | 519193323Sed /// | | 520193323Sed /// \ / 521193323Sed /// \ / 522193323Sed /// [CALL] 523193323Sed SDValue Chain = I->getOperand(0); 524193323Sed SDValue Load = I->getOperand(1); 525193323Sed if (!isCalleeLoad(Load, Chain)) 526193323Sed continue; 527193323Sed MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); 528193323Sed ++NumLoadMoved; 529193323Sed continue; 530193323Sed } 531193323Sed 532193323Sed if (!ISD::isNON_TRUNCStore(I)) 533193323Sed continue; 534193323Sed SDValue Chain = I->getOperand(0); 535193323Sed 536193323Sed if (Chain.getNode()->getOpcode() != ISD::TokenFactor) 537193323Sed continue; 538193323Sed 539193323Sed SDValue N1 = I->getOperand(1); 540193323Sed SDValue N2 = I->getOperand(2); 541193323Sed if ((N1.getValueType().isFloatingPoint() && 542193323Sed !N1.getValueType().isVector()) || 543193323Sed !N1.hasOneUse()) 544193323Sed continue; 545193323Sed 546193323Sed bool RModW = false; 547193323Sed SDValue Load; 548193323Sed unsigned Opcode = N1.getNode()->getOpcode(); 549193323Sed switch (Opcode) { 550193323Sed case ISD::ADD: 551193323Sed case ISD::MUL: 552193323Sed case ISD::AND: 553193323Sed case ISD::OR: 554193323Sed case ISD::XOR: 555193323Sed case ISD::ADDC: 556193323Sed case ISD::ADDE: 557193323Sed case ISD::VECTOR_SHUFFLE: { 558193323Sed SDValue N10 = N1.getOperand(0); 559193323Sed SDValue N11 = N1.getOperand(1); 560193323Sed RModW = isRMWLoad(N10, Chain, N2, Load); 561193323Sed if (!RModW) 562193323Sed RModW = isRMWLoad(N11, Chain, N2, Load); 563193323Sed break; 564193323Sed } 565193323Sed case ISD::SUB: 566193323Sed case ISD::SHL: 567193323Sed case ISD::SRA: 568193323Sed case ISD::SRL: 569193323Sed case ISD::ROTL: 570193323Sed case ISD::ROTR: 571193323Sed case ISD::SUBC: 572193323Sed case ISD::SUBE: 573193323Sed case X86ISD::SHLD: 574193323Sed case X86ISD::SHRD: { 575193323Sed SDValue N10 = N1.getOperand(0); 576193323Sed RModW = isRMWLoad(N10, Chain, N2, Load); 577193323Sed break; 578193323Sed } 579193323Sed } 580193323Sed 581193323Sed if (RModW) { 582193323Sed MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); 583193323Sed ++NumLoadMoved; 584193323Sed } 585193323Sed } 586193323Sed} 587193323Sed 588193323Sed 589193323Sed/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 590193323Sed/// nodes that target the FP stack to be store and load to the stack. This is a 591193323Sed/// gross hack. We would like to simply mark these as being illegal, but when 592193323Sed/// we do that, legalize produces these when it expands calls, then expands 593193323Sed/// these in the same legalize pass. We would like dag combine to be able to 594193323Sed/// hack on these between the call expansion and the node legalization. As such 595193323Sed/// this pass basically does "really late" legalization of these inline with the 596193323Sed/// X86 isel pass. 597193323Sedvoid X86DAGToDAGISel::PreprocessForFPConvert() { 598193323Sed for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 599193323Sed E = CurDAG->allnodes_end(); I != E; ) { 600193323Sed SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 601193323Sed if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 602193323Sed continue; 603193323Sed 604193323Sed // If the source and destination are SSE registers, then this is a legal 605193323Sed // conversion that should not be lowered. 606198090Srdivacky EVT SrcVT = N->getOperand(0).getValueType(); 607198090Srdivacky EVT DstVT = N->getValueType(0); 608193323Sed bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 609193323Sed bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 610193323Sed if (SrcIsSSE && DstIsSSE) 611193323Sed continue; 612193323Sed 613193323Sed if (!SrcIsSSE && !DstIsSSE) { 614193323Sed // If this is an FPStack extension, it is a noop. 615193323Sed if (N->getOpcode() == ISD::FP_EXTEND) 616193323Sed continue; 617193323Sed // If this is a value-preserving FPStack truncation, it is a noop. 618193323Sed if (N->getConstantOperandVal(1)) 619193323Sed continue; 620193323Sed } 621193323Sed 622193323Sed // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 623193323Sed // FPStack has extload and truncstore. SSE can fold direct loads into other 624193323Sed // operations. Based on this, decide what we want to do. 625198090Srdivacky EVT MemVT; 626193323Sed if (N->getOpcode() == ISD::FP_ROUND) 627193323Sed MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 628193323Sed else 629193323Sed MemVT = SrcIsSSE ? SrcVT : DstVT; 630193323Sed 631193323Sed SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 632193323Sed DebugLoc dl = N->getDebugLoc(); 633193323Sed 634193323Sed // FIXME: optimize the case where the src/dest is a load or store? 635193323Sed SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 636193323Sed N->getOperand(0), 637193323Sed MemTmp, NULL, 0, MemVT); 638193323Sed SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 639193323Sed NULL, 0, MemVT); 640193323Sed 641193323Sed // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 642193323Sed // extload we created. This will cause general havok on the dag because 643193323Sed // anything below the conversion could be folded into other existing nodes. 644193323Sed // To avoid invalidating 'I', back it up to the convert node. 645193323Sed --I; 646193323Sed CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 647193323Sed 648193323Sed // Now that we did that, the node is dead. Increment the iterator to the 649193323Sed // next node to process, then delete N. 650193323Sed ++I; 651193323Sed CurDAG->DeleteNode(N); 652193323Sed } 653193323Sed} 654193323Sed 655193323Sed/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 656193323Sed/// when it has created a SelectionDAG for us to codegen. 657193323Sedvoid X86DAGToDAGISel::InstructionSelect() { 658198090Srdivacky const Function *F = MF->getFunction(); 659193323Sed OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); 660193323Sed 661193323Sed DEBUG(BB->dump()); 662193323Sed if (OptLevel != CodeGenOpt::None) 663193323Sed PreprocessForRMW(); 664193323Sed 665193323Sed // FIXME: This should only happen when not compiled with -O0. 666193323Sed PreprocessForFPConvert(); 667193323Sed 668193323Sed // Codegen the basic block. 669193323Sed#ifndef NDEBUG 670198090Srdivacky DEBUG(errs() << "===== Instruction selection begins:\n"); 671193323Sed Indent = 0; 672193323Sed#endif 673193323Sed SelectRoot(*CurDAG); 674193323Sed#ifndef NDEBUG 675198090Srdivacky DEBUG(errs() << "===== Instruction selection ends:\n"); 676193323Sed#endif 677193323Sed 678193323Sed CurDAG->RemoveDeadNodes(); 679193323Sed} 680193323Sed 681193323Sed/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 682193323Sed/// the main function. 683193323Sedvoid X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 684193323Sed MachineFrameInfo *MFI) { 685193323Sed const TargetInstrInfo *TII = TM.getInstrInfo(); 686193323Sed if (Subtarget->isTargetCygMing()) 687193323Sed BuildMI(BB, DebugLoc::getUnknownLoc(), 688193323Sed TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 689193323Sed} 690193323Sed 691193323Sedvoid X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 692193323Sed // If this is main, emit special code for main. 693193323Sed MachineBasicBlock *BB = MF.begin(); 694193323Sed if (Fn.hasExternalLinkage() && Fn.getName() == "main") 695193323Sed EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 696193323Sed} 697193323Sed 698193323Sed 699193323Sedbool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, 700193323Sed X86ISelAddressMode &AM) { 701193323Sed assert(N.getOpcode() == X86ISD::SegmentBaseAddress); 702193323Sed SDValue Segment = N.getOperand(0); 703193323Sed 704193323Sed if (AM.Segment.getNode() == 0) { 705193323Sed AM.Segment = Segment; 706193323Sed return false; 707193323Sed } 708193323Sed 709193323Sed return true; 710193323Sed} 711193323Sed 712193323Sedbool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { 713193323Sed // This optimization is valid because the GNU TLS model defines that 714193323Sed // gs:0 (or fs:0 on X86-64) contains its own address. 715193323Sed // For more information see http://people.redhat.com/drepper/tls.pdf 716193323Sed 717193323Sed SDValue Address = N.getOperand(1); 718193323Sed if (Address.getOpcode() == X86ISD::SegmentBaseAddress && 719193323Sed !MatchSegmentBaseAddress (Address, AM)) 720193323Sed return false; 721193323Sed 722193323Sed return true; 723193323Sed} 724193323Sed 725195098Sed/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 726195098Sed/// into an addressing mode. These wrap things that will resolve down into a 727195098Sed/// symbol reference. If no match is possible, this returns true, otherwise it 728198090Srdivacky/// returns false. 729193323Sedbool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 730195098Sed // If the addressing mode already has a symbol as the displacement, we can 731195098Sed // never match another symbol. 732193323Sed if (AM.hasSymbolicDisplacement()) 733193323Sed return true; 734193323Sed 735193323Sed SDValue N0 = N.getOperand(0); 736198090Srdivacky CodeModel::Model M = TM.getCodeModel(); 737198090Srdivacky 738195098Sed // Handle X86-64 rip-relative addresses. We check this before checking direct 739195098Sed // folding because RIP is preferable to non-RIP accesses. 740195098Sed if (Subtarget->is64Bit() && 741195098Sed // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 742195098Sed // they cannot be folded into immediate fields. 743195098Sed // FIXME: This can be improved for kernel and other models? 744198090Srdivacky (M == CodeModel::Small || M == CodeModel::Kernel) && 745195098Sed // Base and index reg must be 0 in order to use %rip as base and lowering 746195098Sed // must allow RIP. 747195098Sed !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { 748195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 749195098Sed int64_t Offset = AM.Disp + G->getOffset(); 750198090Srdivacky if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 751195098Sed AM.GV = G->getGlobal(); 752195098Sed AM.Disp = Offset; 753195098Sed AM.SymbolFlags = G->getTargetFlags(); 754195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 755195098Sed int64_t Offset = AM.Disp + CP->getOffset(); 756198090Srdivacky if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 757195098Sed AM.CP = CP->getConstVal(); 758195098Sed AM.Align = CP->getAlignment(); 759195098Sed AM.Disp = Offset; 760195098Sed AM.SymbolFlags = CP->getTargetFlags(); 761195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 762195098Sed AM.ES = S->getSymbol(); 763195098Sed AM.SymbolFlags = S->getTargetFlags(); 764195098Sed } else { 765195098Sed JumpTableSDNode *J = cast<JumpTableSDNode>(N0); 766195098Sed AM.JT = J->getIndex(); 767195098Sed AM.SymbolFlags = J->getTargetFlags(); 768193323Sed } 769198090Srdivacky 770195098Sed if (N.getOpcode() == X86ISD::WrapperRIP) 771195098Sed AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 772195098Sed return false; 773195098Sed } 774195098Sed 775195098Sed // Handle the case when globals fit in our immediate field: This is true for 776195098Sed // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit 777195098Sed // mode, this results in a non-RIP-relative computation. 778195098Sed if (!Subtarget->is64Bit() || 779198090Srdivacky ((M == CodeModel::Small || M == CodeModel::Kernel) && 780195098Sed TM.getRelocationModel() == Reloc::Static)) { 781195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 782195098Sed AM.GV = G->getGlobal(); 783195098Sed AM.Disp += G->getOffset(); 784195098Sed AM.SymbolFlags = G->getTargetFlags(); 785195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 786193323Sed AM.CP = CP->getConstVal(); 787193323Sed AM.Align = CP->getAlignment(); 788195098Sed AM.Disp += CP->getOffset(); 789195098Sed AM.SymbolFlags = CP->getTargetFlags(); 790195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 791195098Sed AM.ES = S->getSymbol(); 792195098Sed AM.SymbolFlags = S->getTargetFlags(); 793195098Sed } else { 794195098Sed JumpTableSDNode *J = cast<JumpTableSDNode>(N0); 795195098Sed AM.JT = J->getIndex(); 796195098Sed AM.SymbolFlags = J->getTargetFlags(); 797193323Sed } 798193323Sed return false; 799193323Sed } 800193323Sed 801193323Sed return true; 802193323Sed} 803193323Sed 804193323Sed/// MatchAddress - Add the specified node to the specified addressing mode, 805193323Sed/// returning true if it cannot be done. This just pattern matches for the 806193323Sed/// addressing mode. 807198090Srdivackybool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 808198090Srdivacky if (MatchAddressRecursively(N, AM, 0)) 809198090Srdivacky return true; 810198090Srdivacky 811198090Srdivacky // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 812198090Srdivacky // a smaller encoding and avoids a scaled-index. 813198090Srdivacky if (AM.Scale == 2 && 814198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 815198090Srdivacky AM.Base.Reg.getNode() == 0) { 816198090Srdivacky AM.Base.Reg = AM.IndexReg; 817198090Srdivacky AM.Scale = 1; 818198090Srdivacky } 819198090Srdivacky 820198090Srdivacky // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 821198090Srdivacky // because it has a smaller encoding. 822198090Srdivacky // TODO: Which other code models can use this? 823198090Srdivacky if (TM.getCodeModel() == CodeModel::Small && 824198090Srdivacky Subtarget->is64Bit() && 825198090Srdivacky AM.Scale == 1 && 826198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 827198090Srdivacky AM.Base.Reg.getNode() == 0 && 828198090Srdivacky AM.IndexReg.getNode() == 0 && 829198090Srdivacky AM.SymbolFlags == X86II::MO_NO_FLAG && 830198090Srdivacky AM.hasSymbolicDisplacement()) 831198090Srdivacky AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 832198090Srdivacky 833198090Srdivacky return false; 834198090Srdivacky} 835198090Srdivacky 836198090Srdivackybool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 837198090Srdivacky unsigned Depth) { 838193323Sed bool is64Bit = Subtarget->is64Bit(); 839193323Sed DebugLoc dl = N.getDebugLoc(); 840198090Srdivacky DEBUG({ 841198090Srdivacky errs() << "MatchAddress: "; 842198090Srdivacky AM.dump(); 843198090Srdivacky }); 844193323Sed // Limit recursion. 845193323Sed if (Depth > 5) 846193323Sed return MatchAddressBase(N, AM); 847198090Srdivacky 848198090Srdivacky CodeModel::Model M = TM.getCodeModel(); 849198090Srdivacky 850195098Sed // If this is already a %rip relative address, we can only merge immediates 851195098Sed // into it. Instead of handling this in every case, we handle it here. 852193323Sed // RIP relative addressing: %rip + 32-bit displacement! 853195098Sed if (AM.isRIPRelative()) { 854195098Sed // FIXME: JumpTable and ExternalSymbol address currently don't like 855195098Sed // displacements. It isn't very important, but this should be fixed for 856195098Sed // consistency. 857195098Sed if (!AM.ES && AM.JT != -1) return true; 858198090Srdivacky 859195098Sed if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { 860195098Sed int64_t Val = AM.Disp + Cst->getSExtValue(); 861198090Srdivacky if (X86::isOffsetSuitableForCodeModel(Val, M, 862198090Srdivacky AM.hasSymbolicDisplacement())) { 863195098Sed AM.Disp = Val; 864193323Sed return false; 865193323Sed } 866193323Sed } 867193323Sed return true; 868193323Sed } 869193323Sed 870193323Sed switch (N.getOpcode()) { 871193323Sed default: break; 872193323Sed case ISD::Constant: { 873193323Sed uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 874198090Srdivacky if (!is64Bit || 875198090Srdivacky X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, 876198090Srdivacky AM.hasSymbolicDisplacement())) { 877193323Sed AM.Disp += Val; 878193323Sed return false; 879193323Sed } 880193323Sed break; 881193323Sed } 882193323Sed 883193323Sed case X86ISD::SegmentBaseAddress: 884193323Sed if (!MatchSegmentBaseAddress(N, AM)) 885193323Sed return false; 886193323Sed break; 887193323Sed 888193323Sed case X86ISD::Wrapper: 889195098Sed case X86ISD::WrapperRIP: 890193323Sed if (!MatchWrapper(N, AM)) 891193323Sed return false; 892193323Sed break; 893193323Sed 894193323Sed case ISD::LOAD: 895193323Sed if (!MatchLoad(N, AM)) 896193323Sed return false; 897193323Sed break; 898193323Sed 899193323Sed case ISD::FrameIndex: 900193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase 901193323Sed && AM.Base.Reg.getNode() == 0) { 902193323Sed AM.BaseType = X86ISelAddressMode::FrameIndexBase; 903193323Sed AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 904193323Sed return false; 905193323Sed } 906193323Sed break; 907193323Sed 908193323Sed case ISD::SHL: 909195098Sed if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) 910193323Sed break; 911193323Sed 912193323Sed if (ConstantSDNode 913193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 914193323Sed unsigned Val = CN->getZExtValue(); 915198090Srdivacky // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 916198090Srdivacky // that the base operand remains free for further matching. If 917198090Srdivacky // the base doesn't end up getting used, a post-processing step 918198090Srdivacky // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 919193323Sed if (Val == 1 || Val == 2 || Val == 3) { 920193323Sed AM.Scale = 1 << Val; 921193323Sed SDValue ShVal = N.getNode()->getOperand(0); 922193323Sed 923193323Sed // Okay, we know that we have a scale by now. However, if the scaled 924193323Sed // value is an add of something and a constant, we can fold the 925193323Sed // constant into the disp field here. 926193323Sed if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 927193323Sed isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { 928193323Sed AM.IndexReg = ShVal.getNode()->getOperand(0); 929193323Sed ConstantSDNode *AddVal = 930193323Sed cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 931193323Sed uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); 932198090Srdivacky if (!is64Bit || 933198090Srdivacky X86::isOffsetSuitableForCodeModel(Disp, M, 934198090Srdivacky AM.hasSymbolicDisplacement())) 935193323Sed AM.Disp = Disp; 936193323Sed else 937193323Sed AM.IndexReg = ShVal; 938193323Sed } else { 939193323Sed AM.IndexReg = ShVal; 940193323Sed } 941193323Sed return false; 942193323Sed } 943193323Sed break; 944193323Sed } 945193323Sed 946193323Sed case ISD::SMUL_LOHI: 947193323Sed case ISD::UMUL_LOHI: 948193323Sed // A mul_lohi where we need the low part can be folded as a plain multiply. 949193323Sed if (N.getResNo() != 0) break; 950193323Sed // FALL THROUGH 951193323Sed case ISD::MUL: 952193323Sed case X86ISD::MUL_IMM: 953193323Sed // X*[3,5,9] -> X+X*[2,4,8] 954193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 955193323Sed AM.Base.Reg.getNode() == 0 && 956195098Sed AM.IndexReg.getNode() == 0) { 957193323Sed if (ConstantSDNode 958193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 959193323Sed if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 960193323Sed CN->getZExtValue() == 9) { 961193323Sed AM.Scale = unsigned(CN->getZExtValue())-1; 962193323Sed 963193323Sed SDValue MulVal = N.getNode()->getOperand(0); 964193323Sed SDValue Reg; 965193323Sed 966193323Sed // Okay, we know that we have a scale by now. However, if the scaled 967193323Sed // value is an add of something and a constant, we can fold the 968193323Sed // constant into the disp field here. 969193323Sed if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 970193323Sed isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 971193323Sed Reg = MulVal.getNode()->getOperand(0); 972193323Sed ConstantSDNode *AddVal = 973193323Sed cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 974193323Sed uint64_t Disp = AM.Disp + AddVal->getSExtValue() * 975193323Sed CN->getZExtValue(); 976198090Srdivacky if (!is64Bit || 977198090Srdivacky X86::isOffsetSuitableForCodeModel(Disp, M, 978198090Srdivacky AM.hasSymbolicDisplacement())) 979193323Sed AM.Disp = Disp; 980193323Sed else 981193323Sed Reg = N.getNode()->getOperand(0); 982193323Sed } else { 983193323Sed Reg = N.getNode()->getOperand(0); 984193323Sed } 985193323Sed 986193323Sed AM.IndexReg = AM.Base.Reg = Reg; 987193323Sed return false; 988193323Sed } 989193323Sed } 990193323Sed break; 991193323Sed 992193323Sed case ISD::SUB: { 993193323Sed // Given A-B, if A can be completely folded into the address and 994193323Sed // the index field with the index field unused, use -B as the index. 995193323Sed // This is a win if a has multiple parts that can be folded into 996193323Sed // the address. Also, this saves a mov if the base register has 997193323Sed // other uses, since it avoids a two-address sub instruction, however 998193323Sed // it costs an additional mov if the index register has other uses. 999193323Sed 1000193323Sed // Test if the LHS of the sub can be folded. 1001193323Sed X86ISelAddressMode Backup = AM; 1002198090Srdivacky if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 1003193323Sed AM = Backup; 1004193323Sed break; 1005193323Sed } 1006193323Sed // Test if the index field is free for use. 1007195098Sed if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 1008193323Sed AM = Backup; 1009193323Sed break; 1010193323Sed } 1011193323Sed int Cost = 0; 1012193323Sed SDValue RHS = N.getNode()->getOperand(1); 1013193323Sed // If the RHS involves a register with multiple uses, this 1014193323Sed // transformation incurs an extra mov, due to the neg instruction 1015193323Sed // clobbering its operand. 1016193323Sed if (!RHS.getNode()->hasOneUse() || 1017193323Sed RHS.getNode()->getOpcode() == ISD::CopyFromReg || 1018193323Sed RHS.getNode()->getOpcode() == ISD::TRUNCATE || 1019193323Sed RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 1020193323Sed (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 1021193323Sed RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 1022193323Sed ++Cost; 1023193323Sed // If the base is a register with multiple uses, this 1024193323Sed // transformation may save a mov. 1025193323Sed if ((AM.BaseType == X86ISelAddressMode::RegBase && 1026193323Sed AM.Base.Reg.getNode() && 1027193323Sed !AM.Base.Reg.getNode()->hasOneUse()) || 1028193323Sed AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1029193323Sed --Cost; 1030193323Sed // If the folded LHS was interesting, this transformation saves 1031193323Sed // address arithmetic. 1032193323Sed if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 1033193323Sed ((AM.Disp != 0) && (Backup.Disp == 0)) + 1034193323Sed (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 1035193323Sed --Cost; 1036193323Sed // If it doesn't look like it may be an overall win, don't do it. 1037193323Sed if (Cost >= 0) { 1038193323Sed AM = Backup; 1039193323Sed break; 1040193323Sed } 1041193323Sed 1042193323Sed // Ok, the transformation is legal and appears profitable. Go for it. 1043193323Sed SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 1044193323Sed SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 1045193323Sed AM.IndexReg = Neg; 1046193323Sed AM.Scale = 1; 1047193323Sed 1048193323Sed // Insert the new nodes into the topological ordering. 1049193323Sed if (Zero.getNode()->getNodeId() == -1 || 1050193323Sed Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1051193323Sed CurDAG->RepositionNode(N.getNode(), Zero.getNode()); 1052193323Sed Zero.getNode()->setNodeId(N.getNode()->getNodeId()); 1053193323Sed } 1054193323Sed if (Neg.getNode()->getNodeId() == -1 || 1055193323Sed Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1056193323Sed CurDAG->RepositionNode(N.getNode(), Neg.getNode()); 1057193323Sed Neg.getNode()->setNodeId(N.getNode()->getNodeId()); 1058193323Sed } 1059193323Sed return false; 1060193323Sed } 1061193323Sed 1062193323Sed case ISD::ADD: { 1063193323Sed X86ISelAddressMode Backup = AM; 1064198090Srdivacky if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && 1065198090Srdivacky !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) 1066193323Sed return false; 1067193323Sed AM = Backup; 1068198090Srdivacky if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && 1069198090Srdivacky !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) 1070193323Sed return false; 1071193323Sed AM = Backup; 1072193323Sed 1073193323Sed // If we couldn't fold both operands into the address at the same time, 1074193323Sed // see if we can just put each operand into a register and fold at least 1075193323Sed // the add. 1076193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 1077193323Sed !AM.Base.Reg.getNode() && 1078195098Sed !AM.IndexReg.getNode()) { 1079193323Sed AM.Base.Reg = N.getNode()->getOperand(0); 1080193323Sed AM.IndexReg = N.getNode()->getOperand(1); 1081193323Sed AM.Scale = 1; 1082193323Sed return false; 1083193323Sed } 1084193323Sed break; 1085193323Sed } 1086193323Sed 1087193323Sed case ISD::OR: 1088193323Sed // Handle "X | C" as "X + C" iff X is known to have C bits clear. 1089193323Sed if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1090193323Sed X86ISelAddressMode Backup = AM; 1091193323Sed uint64_t Offset = CN->getSExtValue(); 1092193323Sed // Start with the LHS as an addr mode. 1093198090Srdivacky if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1094193323Sed // Address could not have picked a GV address for the displacement. 1095193323Sed AM.GV == NULL && 1096193323Sed // On x86-64, the resultant disp must fit in 32-bits. 1097198090Srdivacky (!is64Bit || 1098198090Srdivacky X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, 1099198090Srdivacky AM.hasSymbolicDisplacement())) && 1100193323Sed // Check to see if the LHS & C is zero. 1101193323Sed CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 1102193323Sed AM.Disp += Offset; 1103193323Sed return false; 1104193323Sed } 1105193323Sed AM = Backup; 1106193323Sed } 1107193323Sed break; 1108193323Sed 1109193323Sed case ISD::AND: { 1110193323Sed // Perform some heroic transforms on an and of a constant-count shift 1111193323Sed // with a constant to enable use of the scaled offset field. 1112193323Sed 1113193323Sed SDValue Shift = N.getOperand(0); 1114193323Sed if (Shift.getNumOperands() != 2) break; 1115193323Sed 1116193323Sed // Scale must not be used already. 1117193323Sed if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1118193323Sed 1119193323Sed SDValue X = Shift.getOperand(0); 1120193323Sed ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1121193323Sed ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 1122193323Sed if (!C1 || !C2) break; 1123193323Sed 1124193323Sed // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This 1125193323Sed // allows us to convert the shift and and into an h-register extract and 1126193323Sed // a scaled index. 1127193323Sed if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { 1128193323Sed unsigned ScaleLog = 8 - C1->getZExtValue(); 1129193323Sed if (ScaleLog > 0 && ScaleLog < 4 && 1130193323Sed C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { 1131193323Sed SDValue Eight = CurDAG->getConstant(8, MVT::i8); 1132193323Sed SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); 1133193323Sed SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1134193323Sed X, Eight); 1135193323Sed SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), 1136193323Sed Srl, Mask); 1137193323Sed SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); 1138193323Sed SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1139193323Sed And, ShlCount); 1140193323Sed 1141193323Sed // Insert the new nodes into the topological ordering. 1142193323Sed if (Eight.getNode()->getNodeId() == -1 || 1143193323Sed Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1144193323Sed CurDAG->RepositionNode(X.getNode(), Eight.getNode()); 1145193323Sed Eight.getNode()->setNodeId(X.getNode()->getNodeId()); 1146193323Sed } 1147193323Sed if (Mask.getNode()->getNodeId() == -1 || 1148193323Sed Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1149193323Sed CurDAG->RepositionNode(X.getNode(), Mask.getNode()); 1150193323Sed Mask.getNode()->setNodeId(X.getNode()->getNodeId()); 1151193323Sed } 1152193323Sed if (Srl.getNode()->getNodeId() == -1 || 1153193323Sed Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1154193323Sed CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); 1155193323Sed Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1156193323Sed } 1157193323Sed if (And.getNode()->getNodeId() == -1 || 1158193323Sed And.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1159193323Sed CurDAG->RepositionNode(N.getNode(), And.getNode()); 1160193323Sed And.getNode()->setNodeId(N.getNode()->getNodeId()); 1161193323Sed } 1162193323Sed if (ShlCount.getNode()->getNodeId() == -1 || 1163193323Sed ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1164193323Sed CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); 1165193323Sed ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); 1166193323Sed } 1167193323Sed if (Shl.getNode()->getNodeId() == -1 || 1168193323Sed Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1169193323Sed CurDAG->RepositionNode(N.getNode(), Shl.getNode()); 1170193323Sed Shl.getNode()->setNodeId(N.getNode()->getNodeId()); 1171193323Sed } 1172193323Sed CurDAG->ReplaceAllUsesWith(N, Shl); 1173193323Sed AM.IndexReg = And; 1174193323Sed AM.Scale = (1 << ScaleLog); 1175193323Sed return false; 1176193323Sed } 1177193323Sed } 1178193323Sed 1179193323Sed // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 1180193323Sed // allows us to fold the shift into this addressing mode. 1181193323Sed if (Shift.getOpcode() != ISD::SHL) break; 1182193323Sed 1183193323Sed // Not likely to be profitable if either the AND or SHIFT node has more 1184193323Sed // than one use (unless all uses are for address computation). Besides, 1185193323Sed // isel mechanism requires their node ids to be reused. 1186193323Sed if (!N.hasOneUse() || !Shift.hasOneUse()) 1187193323Sed break; 1188193323Sed 1189193323Sed // Verify that the shift amount is something we can fold. 1190193323Sed unsigned ShiftCst = C1->getZExtValue(); 1191193323Sed if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 1192193323Sed break; 1193193323Sed 1194193323Sed // Get the new AND mask, this folds to a constant. 1195193323Sed SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1196193323Sed SDValue(C2, 0), SDValue(C1, 0)); 1197193323Sed SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 1198193323Sed NewANDMask); 1199193323Sed SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1200193323Sed NewAND, SDValue(C1, 0)); 1201193323Sed 1202193323Sed // Insert the new nodes into the topological ordering. 1203193323Sed if (C1->getNodeId() > X.getNode()->getNodeId()) { 1204193323Sed CurDAG->RepositionNode(X.getNode(), C1); 1205193323Sed C1->setNodeId(X.getNode()->getNodeId()); 1206193323Sed } 1207193323Sed if (NewANDMask.getNode()->getNodeId() == -1 || 1208193323Sed NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1209193323Sed CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); 1210193323Sed NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); 1211193323Sed } 1212193323Sed if (NewAND.getNode()->getNodeId() == -1 || 1213193323Sed NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1214193323Sed CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); 1215193323Sed NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1216193323Sed } 1217193323Sed if (NewSHIFT.getNode()->getNodeId() == -1 || 1218193323Sed NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1219193323Sed CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); 1220193323Sed NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); 1221193323Sed } 1222193323Sed 1223193323Sed CurDAG->ReplaceAllUsesWith(N, NewSHIFT); 1224193323Sed 1225193323Sed AM.Scale = 1 << ShiftCst; 1226193323Sed AM.IndexReg = NewAND; 1227193323Sed return false; 1228193323Sed } 1229193323Sed } 1230193323Sed 1231193323Sed return MatchAddressBase(N, AM); 1232193323Sed} 1233193323Sed 1234193323Sed/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1235193323Sed/// specified addressing mode without any further recursion. 1236193323Sedbool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1237193323Sed // Is the base register already occupied? 1238193323Sed if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { 1239193323Sed // If so, check to see if the scale index register is set. 1240195098Sed if (AM.IndexReg.getNode() == 0) { 1241193323Sed AM.IndexReg = N; 1242193323Sed AM.Scale = 1; 1243193323Sed return false; 1244193323Sed } 1245193323Sed 1246193323Sed // Otherwise, we cannot select it. 1247193323Sed return true; 1248193323Sed } 1249193323Sed 1250193323Sed // Default, generate it as a register. 1251193323Sed AM.BaseType = X86ISelAddressMode::RegBase; 1252193323Sed AM.Base.Reg = N; 1253193323Sed return false; 1254193323Sed} 1255193323Sed 1256193323Sed/// SelectAddr - returns true if it is able pattern match an addressing mode. 1257193323Sed/// It returns the operands which make up the maximal addressing mode it can 1258193323Sed/// match by reference. 1259193323Sedbool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, 1260193323Sed SDValue &Scale, SDValue &Index, 1261193323Sed SDValue &Disp, SDValue &Segment) { 1262193323Sed X86ISelAddressMode AM; 1263193323Sed bool Done = false; 1264193323Sed if (AvoidDupAddrCompute && !N.hasOneUse()) { 1265193323Sed unsigned Opcode = N.getOpcode(); 1266193323Sed if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && 1267195098Sed Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { 1268193323Sed // If we are able to fold N into addressing mode, then we'll allow it even 1269193323Sed // if N has multiple uses. In general, addressing computation is used as 1270193323Sed // addresses by all of its uses. But watch out for CopyToReg uses, that 1271193323Sed // means the address computation is liveout. It will be computed by a LEA 1272193323Sed // so we want to avoid computing the address twice. 1273193323Sed for (SDNode::use_iterator UI = N.getNode()->use_begin(), 1274193323Sed UE = N.getNode()->use_end(); UI != UE; ++UI) { 1275193323Sed if (UI->getOpcode() == ISD::CopyToReg) { 1276193323Sed MatchAddressBase(N, AM); 1277193323Sed Done = true; 1278193323Sed break; 1279193323Sed } 1280193323Sed } 1281193323Sed } 1282193323Sed } 1283193323Sed 1284193323Sed if (!Done && MatchAddress(N, AM)) 1285193323Sed return false; 1286193323Sed 1287198090Srdivacky EVT VT = N.getValueType(); 1288193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) { 1289193323Sed if (!AM.Base.Reg.getNode()) 1290193323Sed AM.Base.Reg = CurDAG->getRegister(0, VT); 1291193323Sed } 1292193323Sed 1293193323Sed if (!AM.IndexReg.getNode()) 1294193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1295193323Sed 1296193323Sed getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1297193323Sed return true; 1298193323Sed} 1299193323Sed 1300193323Sed/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1301193323Sed/// match a load whose top elements are either undef or zeros. The load flavor 1302193323Sed/// is derived from the type of N, which is either v4f32 or v2f64. 1303193323Sedbool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, 1304193323Sed SDValue N, SDValue &Base, 1305193323Sed SDValue &Scale, SDValue &Index, 1306193323Sed SDValue &Disp, SDValue &Segment, 1307193323Sed SDValue &InChain, 1308193323Sed SDValue &OutChain) { 1309193323Sed if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1310193323Sed InChain = N.getOperand(0).getValue(1); 1311193323Sed if (ISD::isNON_EXTLoad(InChain.getNode()) && 1312193323Sed InChain.getValue(0).hasOneUse() && 1313193323Sed N.hasOneUse() && 1314193323Sed IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { 1315193323Sed LoadSDNode *LD = cast<LoadSDNode>(InChain); 1316193323Sed if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1317193323Sed return false; 1318193323Sed OutChain = LD->getChain(); 1319193323Sed return true; 1320193323Sed } 1321193323Sed } 1322193323Sed 1323193323Sed // Also handle the case where we explicitly require zeros in the top 1324193323Sed // elements. This is a vector shuffle from the zero vector. 1325193323Sed if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1326193323Sed // Check to see if the top elements are all zeros (or bitcast of zeros). 1327193323Sed N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1328193323Sed N.getOperand(0).getNode()->hasOneUse() && 1329193323Sed ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1330193323Sed N.getOperand(0).getOperand(0).hasOneUse()) { 1331193323Sed // Okay, this is a zero extending load. Fold it. 1332193323Sed LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1333193323Sed if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1334193323Sed return false; 1335193323Sed OutChain = LD->getChain(); 1336193323Sed InChain = SDValue(LD, 1); 1337193323Sed return true; 1338193323Sed } 1339193323Sed return false; 1340193323Sed} 1341193323Sed 1342193323Sed 1343193323Sed/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1344193323Sed/// mode it matches can be cost effectively emitted as an LEA instruction. 1345193323Sedbool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, 1346193323Sed SDValue &Base, SDValue &Scale, 1347193323Sed SDValue &Index, SDValue &Disp) { 1348193323Sed X86ISelAddressMode AM; 1349193323Sed 1350193323Sed // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1351193323Sed // segments. 1352193323Sed SDValue Copy = AM.Segment; 1353193323Sed SDValue T = CurDAG->getRegister(0, MVT::i32); 1354193323Sed AM.Segment = T; 1355193323Sed if (MatchAddress(N, AM)) 1356193323Sed return false; 1357193323Sed assert (T == AM.Segment); 1358193323Sed AM.Segment = Copy; 1359193323Sed 1360198090Srdivacky EVT VT = N.getValueType(); 1361193323Sed unsigned Complexity = 0; 1362193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) 1363193323Sed if (AM.Base.Reg.getNode()) 1364193323Sed Complexity = 1; 1365193323Sed else 1366193323Sed AM.Base.Reg = CurDAG->getRegister(0, VT); 1367193323Sed else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1368193323Sed Complexity = 4; 1369193323Sed 1370193323Sed if (AM.IndexReg.getNode()) 1371193323Sed Complexity++; 1372193323Sed else 1373193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1374193323Sed 1375193323Sed // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1376193323Sed // a simple shift. 1377193323Sed if (AM.Scale > 1) 1378193323Sed Complexity++; 1379193323Sed 1380193323Sed // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1381193323Sed // to a LEA. This is determined with some expermentation but is by no means 1382193323Sed // optimal (especially for code size consideration). LEA is nice because of 1383193323Sed // its three-address nature. Tweak the cost function again when we can run 1384193323Sed // convertToThreeAddress() at register allocation time. 1385193323Sed if (AM.hasSymbolicDisplacement()) { 1386193323Sed // For X86-64, we should always use lea to materialize RIP relative 1387193323Sed // addresses. 1388193323Sed if (Subtarget->is64Bit()) 1389193323Sed Complexity = 4; 1390193323Sed else 1391193323Sed Complexity += 2; 1392193323Sed } 1393193323Sed 1394193323Sed if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 1395193323Sed Complexity++; 1396193323Sed 1397198090Srdivacky // If it isn't worth using an LEA, reject it. 1398198090Srdivacky if (Complexity <= 2) 1399198090Srdivacky return false; 1400198090Srdivacky 1401198090Srdivacky SDValue Segment; 1402198090Srdivacky getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1403198090Srdivacky return true; 1404193323Sed} 1405193323Sed 1406194612Sed/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1407194612Sedbool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 1408194612Sed SDValue &Scale, SDValue &Index, 1409194612Sed SDValue &Disp) { 1410194612Sed assert(Op.getOpcode() == X86ISD::TLSADDR); 1411194612Sed assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1412194612Sed const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1413194612Sed 1414194612Sed X86ISelAddressMode AM; 1415194612Sed AM.GV = GA->getGlobal(); 1416194612Sed AM.Disp += GA->getOffset(); 1417194612Sed AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); 1418195098Sed AM.SymbolFlags = GA->getTargetFlags(); 1419195098Sed 1420194612Sed if (N.getValueType() == MVT::i32) { 1421194612Sed AM.Scale = 1; 1422194612Sed AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1423194612Sed } else { 1424194612Sed AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1425194612Sed } 1426194612Sed 1427194612Sed SDValue Segment; 1428194612Sed getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1429194612Sed return true; 1430194612Sed} 1431194612Sed 1432194612Sed 1433193323Sedbool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, 1434193323Sed SDValue &Base, SDValue &Scale, 1435193323Sed SDValue &Index, SDValue &Disp, 1436193323Sed SDValue &Segment) { 1437193323Sed if (ISD::isNON_EXTLoad(N.getNode()) && 1438193323Sed N.hasOneUse() && 1439193323Sed IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) 1440193323Sed return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); 1441193323Sed return false; 1442193323Sed} 1443193323Sed 1444193323Sed/// getGlobalBaseReg - Return an SDNode that returns the value of 1445193323Sed/// the global base register. Output instructions required to 1446193323Sed/// initialize the global base register, if necessary. 1447193323Sed/// 1448193323SedSDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1449193399Sed unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1450193323Sed return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); 1451193323Sed} 1452193323Sed 1453193323Sedstatic SDNode *FindCallStartFromCall(SDNode *Node) { 1454193323Sed if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1455193323Sed assert(Node->getOperand(0).getValueType() == MVT::Other && 1456193323Sed "Node doesn't have a token chain argument!"); 1457193323Sed return FindCallStartFromCall(Node->getOperand(0).getNode()); 1458193323Sed} 1459193323Sed 1460193323SedSDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1461193323Sed SDValue Chain = Node->getOperand(0); 1462193323Sed SDValue In1 = Node->getOperand(1); 1463193323Sed SDValue In2L = Node->getOperand(2); 1464193323Sed SDValue In2H = Node->getOperand(3); 1465193323Sed SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1466193323Sed if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1467193323Sed return NULL; 1468198090Srdivacky MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1469198090Srdivacky MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1470198090Srdivacky const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; 1471198090Srdivacky SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), 1472198090Srdivacky MVT::i32, MVT::i32, MVT::Other, Ops, 1473198090Srdivacky array_lengthof(Ops)); 1474198090Srdivacky cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); 1475198090Srdivacky return ResNode; 1476193323Sed} 1477193323Sed 1478198090SrdivackySDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { 1479198090Srdivacky if (Node->hasAnyUseOfValue(0)) 1480198090Srdivacky return 0; 1481198090Srdivacky 1482198090Srdivacky // Optimize common patterns for __sync_add_and_fetch and 1483198090Srdivacky // __sync_sub_and_fetch where the result is not used. This allows us 1484198090Srdivacky // to use "lock" version of add, sub, inc, dec instructions. 1485198090Srdivacky // FIXME: Do not use special instructions but instead add the "lock" 1486198090Srdivacky // prefix to the target node somehow. The extra information will then be 1487198090Srdivacky // transferred to machine instruction and it denotes the prefix. 1488198090Srdivacky SDValue Chain = Node->getOperand(0); 1489198090Srdivacky SDValue Ptr = Node->getOperand(1); 1490198090Srdivacky SDValue Val = Node->getOperand(2); 1491198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1492198090Srdivacky if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1493198090Srdivacky return 0; 1494198090Srdivacky 1495198090Srdivacky bool isInc = false, isDec = false, isSub = false, isCN = false; 1496198090Srdivacky ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); 1497198090Srdivacky if (CN) { 1498198090Srdivacky isCN = true; 1499198090Srdivacky int64_t CNVal = CN->getSExtValue(); 1500198090Srdivacky if (CNVal == 1) 1501198090Srdivacky isInc = true; 1502198090Srdivacky else if (CNVal == -1) 1503198090Srdivacky isDec = true; 1504198090Srdivacky else if (CNVal >= 0) 1505198090Srdivacky Val = CurDAG->getTargetConstant(CNVal, NVT); 1506198090Srdivacky else { 1507198090Srdivacky isSub = true; 1508198090Srdivacky Val = CurDAG->getTargetConstant(-CNVal, NVT); 1509198090Srdivacky } 1510198090Srdivacky } else if (Val.hasOneUse() && 1511198090Srdivacky Val.getOpcode() == ISD::SUB && 1512198090Srdivacky X86::isZeroNode(Val.getOperand(0))) { 1513198090Srdivacky isSub = true; 1514198090Srdivacky Val = Val.getOperand(1); 1515198090Srdivacky } 1516198090Srdivacky 1517198090Srdivacky unsigned Opc = 0; 1518198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1519198090Srdivacky default: return 0; 1520198090Srdivacky case MVT::i8: 1521198090Srdivacky if (isInc) 1522198090Srdivacky Opc = X86::LOCK_INC8m; 1523198090Srdivacky else if (isDec) 1524198090Srdivacky Opc = X86::LOCK_DEC8m; 1525198090Srdivacky else if (isSub) { 1526198090Srdivacky if (isCN) 1527198090Srdivacky Opc = X86::LOCK_SUB8mi; 1528198090Srdivacky else 1529198090Srdivacky Opc = X86::LOCK_SUB8mr; 1530198090Srdivacky } else { 1531198090Srdivacky if (isCN) 1532198090Srdivacky Opc = X86::LOCK_ADD8mi; 1533198090Srdivacky else 1534198090Srdivacky Opc = X86::LOCK_ADD8mr; 1535198090Srdivacky } 1536198090Srdivacky break; 1537198090Srdivacky case MVT::i16: 1538198090Srdivacky if (isInc) 1539198090Srdivacky Opc = X86::LOCK_INC16m; 1540198090Srdivacky else if (isDec) 1541198090Srdivacky Opc = X86::LOCK_DEC16m; 1542198090Srdivacky else if (isSub) { 1543198090Srdivacky if (isCN) { 1544198090Srdivacky if (Predicate_i16immSExt8(Val.getNode())) 1545198090Srdivacky Opc = X86::LOCK_SUB16mi8; 1546198090Srdivacky else 1547198090Srdivacky Opc = X86::LOCK_SUB16mi; 1548198090Srdivacky } else 1549198090Srdivacky Opc = X86::LOCK_SUB16mr; 1550198090Srdivacky } else { 1551198090Srdivacky if (isCN) { 1552198090Srdivacky if (Predicate_i16immSExt8(Val.getNode())) 1553198090Srdivacky Opc = X86::LOCK_ADD16mi8; 1554198090Srdivacky else 1555198090Srdivacky Opc = X86::LOCK_ADD16mi; 1556198090Srdivacky } else 1557198090Srdivacky Opc = X86::LOCK_ADD16mr; 1558198090Srdivacky } 1559198090Srdivacky break; 1560198090Srdivacky case MVT::i32: 1561198090Srdivacky if (isInc) 1562198090Srdivacky Opc = X86::LOCK_INC32m; 1563198090Srdivacky else if (isDec) 1564198090Srdivacky Opc = X86::LOCK_DEC32m; 1565198090Srdivacky else if (isSub) { 1566198090Srdivacky if (isCN) { 1567198090Srdivacky if (Predicate_i32immSExt8(Val.getNode())) 1568198090Srdivacky Opc = X86::LOCK_SUB32mi8; 1569198090Srdivacky else 1570198090Srdivacky Opc = X86::LOCK_SUB32mi; 1571198090Srdivacky } else 1572198090Srdivacky Opc = X86::LOCK_SUB32mr; 1573198090Srdivacky } else { 1574198090Srdivacky if (isCN) { 1575198090Srdivacky if (Predicate_i32immSExt8(Val.getNode())) 1576198090Srdivacky Opc = X86::LOCK_ADD32mi8; 1577198090Srdivacky else 1578198090Srdivacky Opc = X86::LOCK_ADD32mi; 1579198090Srdivacky } else 1580198090Srdivacky Opc = X86::LOCK_ADD32mr; 1581198090Srdivacky } 1582198090Srdivacky break; 1583198090Srdivacky case MVT::i64: 1584198090Srdivacky if (isInc) 1585198090Srdivacky Opc = X86::LOCK_INC64m; 1586198090Srdivacky else if (isDec) 1587198090Srdivacky Opc = X86::LOCK_DEC64m; 1588198090Srdivacky else if (isSub) { 1589198090Srdivacky Opc = X86::LOCK_SUB64mr; 1590198090Srdivacky if (isCN) { 1591198090Srdivacky if (Predicate_i64immSExt8(Val.getNode())) 1592198090Srdivacky Opc = X86::LOCK_SUB64mi8; 1593198090Srdivacky else if (Predicate_i64immSExt32(Val.getNode())) 1594198090Srdivacky Opc = X86::LOCK_SUB64mi32; 1595198090Srdivacky } 1596198090Srdivacky } else { 1597198090Srdivacky Opc = X86::LOCK_ADD64mr; 1598198090Srdivacky if (isCN) { 1599198090Srdivacky if (Predicate_i64immSExt8(Val.getNode())) 1600198090Srdivacky Opc = X86::LOCK_ADD64mi8; 1601198090Srdivacky else if (Predicate_i64immSExt32(Val.getNode())) 1602198090Srdivacky Opc = X86::LOCK_ADD64mi32; 1603198090Srdivacky } 1604198090Srdivacky } 1605198090Srdivacky break; 1606198090Srdivacky } 1607198090Srdivacky 1608198090Srdivacky DebugLoc dl = Node->getDebugLoc(); 1609198090Srdivacky SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, 1610198090Srdivacky dl, NVT), 0); 1611198090Srdivacky MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1612198090Srdivacky MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1613198090Srdivacky if (isInc || isDec) { 1614198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; 1615198090Srdivacky SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); 1616198090Srdivacky cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1617198090Srdivacky SDValue RetVals[] = { Undef, Ret }; 1618198090Srdivacky return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1619198090Srdivacky } else { 1620198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; 1621198090Srdivacky SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); 1622198090Srdivacky cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1623198090Srdivacky SDValue RetVals[] = { Undef, Ret }; 1624198090Srdivacky return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1625198090Srdivacky } 1626198090Srdivacky} 1627198090Srdivacky 1628198090Srdivacky/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has 1629198090Srdivacky/// any uses which require the SF or OF bits to be accurate. 1630198090Srdivackystatic bool HasNoSignedComparisonUses(SDNode *N) { 1631198090Srdivacky // Examine each user of the node. 1632198090Srdivacky for (SDNode::use_iterator UI = N->use_begin(), 1633198090Srdivacky UE = N->use_end(); UI != UE; ++UI) { 1634198090Srdivacky // Only examine CopyToReg uses. 1635198090Srdivacky if (UI->getOpcode() != ISD::CopyToReg) 1636198090Srdivacky return false; 1637198090Srdivacky // Only examine CopyToReg uses that copy to EFLAGS. 1638198090Srdivacky if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 1639198090Srdivacky X86::EFLAGS) 1640198090Srdivacky return false; 1641198090Srdivacky // Examine each user of the CopyToReg use. 1642198090Srdivacky for (SDNode::use_iterator FlagUI = UI->use_begin(), 1643198090Srdivacky FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 1644198090Srdivacky // Only examine the Flag result. 1645198090Srdivacky if (FlagUI.getUse().getResNo() != 1) continue; 1646198090Srdivacky // Anything unusual: assume conservatively. 1647198090Srdivacky if (!FlagUI->isMachineOpcode()) return false; 1648198090Srdivacky // Examine the opcode of the user. 1649198090Srdivacky switch (FlagUI->getMachineOpcode()) { 1650198090Srdivacky // These comparisons don't treat the most significant bit specially. 1651198090Srdivacky case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 1652198090Srdivacky case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 1653198090Srdivacky case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 1654198090Srdivacky case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 1655198090Srdivacky case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: 1656198090Srdivacky case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: 1657198090Srdivacky case X86::CMOVA16rr: case X86::CMOVA16rm: 1658198090Srdivacky case X86::CMOVA32rr: case X86::CMOVA32rm: 1659198090Srdivacky case X86::CMOVA64rr: case X86::CMOVA64rm: 1660198090Srdivacky case X86::CMOVAE16rr: case X86::CMOVAE16rm: 1661198090Srdivacky case X86::CMOVAE32rr: case X86::CMOVAE32rm: 1662198090Srdivacky case X86::CMOVAE64rr: case X86::CMOVAE64rm: 1663198090Srdivacky case X86::CMOVB16rr: case X86::CMOVB16rm: 1664198090Srdivacky case X86::CMOVB32rr: case X86::CMOVB32rm: 1665198090Srdivacky case X86::CMOVB64rr: case X86::CMOVB64rm: 1666198090Srdivacky case X86::CMOVBE16rr: case X86::CMOVBE16rm: 1667198090Srdivacky case X86::CMOVBE32rr: case X86::CMOVBE32rm: 1668198090Srdivacky case X86::CMOVBE64rr: case X86::CMOVBE64rm: 1669198090Srdivacky case X86::CMOVE16rr: case X86::CMOVE16rm: 1670198090Srdivacky case X86::CMOVE32rr: case X86::CMOVE32rm: 1671198090Srdivacky case X86::CMOVE64rr: case X86::CMOVE64rm: 1672198090Srdivacky case X86::CMOVNE16rr: case X86::CMOVNE16rm: 1673198090Srdivacky case X86::CMOVNE32rr: case X86::CMOVNE32rm: 1674198090Srdivacky case X86::CMOVNE64rr: case X86::CMOVNE64rm: 1675198090Srdivacky case X86::CMOVNP16rr: case X86::CMOVNP16rm: 1676198090Srdivacky case X86::CMOVNP32rr: case X86::CMOVNP32rm: 1677198090Srdivacky case X86::CMOVNP64rr: case X86::CMOVNP64rm: 1678198090Srdivacky case X86::CMOVP16rr: case X86::CMOVP16rm: 1679198090Srdivacky case X86::CMOVP32rr: case X86::CMOVP32rm: 1680198090Srdivacky case X86::CMOVP64rr: case X86::CMOVP64rm: 1681198090Srdivacky continue; 1682198090Srdivacky // Anything else: assume conservatively. 1683198090Srdivacky default: return false; 1684198090Srdivacky } 1685198090Srdivacky } 1686198090Srdivacky } 1687198090Srdivacky return true; 1688198090Srdivacky} 1689198090Srdivacky 1690193323SedSDNode *X86DAGToDAGISel::Select(SDValue N) { 1691193323Sed SDNode *Node = N.getNode(); 1692198090Srdivacky EVT NVT = Node->getValueType(0); 1693193323Sed unsigned Opc, MOpc; 1694193323Sed unsigned Opcode = Node->getOpcode(); 1695193323Sed DebugLoc dl = Node->getDebugLoc(); 1696193323Sed 1697193323Sed#ifndef NDEBUG 1698198090Srdivacky DEBUG({ 1699198090Srdivacky errs() << std::string(Indent, ' ') << "Selecting: "; 1700198090Srdivacky Node->dump(CurDAG); 1701198090Srdivacky errs() << '\n'; 1702198090Srdivacky }); 1703193323Sed Indent += 2; 1704193323Sed#endif 1705193323Sed 1706193323Sed if (Node->isMachineOpcode()) { 1707193323Sed#ifndef NDEBUG 1708198090Srdivacky DEBUG({ 1709198090Srdivacky errs() << std::string(Indent-2, ' ') << "== "; 1710198090Srdivacky Node->dump(CurDAG); 1711198090Srdivacky errs() << '\n'; 1712198090Srdivacky }); 1713193323Sed Indent -= 2; 1714193323Sed#endif 1715193323Sed return NULL; // Already selected. 1716193323Sed } 1717193323Sed 1718193323Sed switch (Opcode) { 1719198090Srdivacky default: break; 1720198090Srdivacky case X86ISD::GlobalBaseReg: 1721198090Srdivacky return getGlobalBaseReg(); 1722193323Sed 1723198090Srdivacky case X86ISD::ATOMOR64_DAG: 1724198090Srdivacky return SelectAtomic64(Node, X86::ATOMOR6432); 1725198090Srdivacky case X86ISD::ATOMXOR64_DAG: 1726198090Srdivacky return SelectAtomic64(Node, X86::ATOMXOR6432); 1727198090Srdivacky case X86ISD::ATOMADD64_DAG: 1728198090Srdivacky return SelectAtomic64(Node, X86::ATOMADD6432); 1729198090Srdivacky case X86ISD::ATOMSUB64_DAG: 1730198090Srdivacky return SelectAtomic64(Node, X86::ATOMSUB6432); 1731198090Srdivacky case X86ISD::ATOMNAND64_DAG: 1732198090Srdivacky return SelectAtomic64(Node, X86::ATOMNAND6432); 1733198090Srdivacky case X86ISD::ATOMAND64_DAG: 1734198090Srdivacky return SelectAtomic64(Node, X86::ATOMAND6432); 1735198090Srdivacky case X86ISD::ATOMSWAP64_DAG: 1736198090Srdivacky return SelectAtomic64(Node, X86::ATOMSWAP6432); 1737193323Sed 1738198090Srdivacky case ISD::ATOMIC_LOAD_ADD: { 1739198090Srdivacky SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); 1740198090Srdivacky if (RetVal) 1741198090Srdivacky return RetVal; 1742198090Srdivacky break; 1743198090Srdivacky } 1744193323Sed 1745198090Srdivacky case ISD::SMUL_LOHI: 1746198090Srdivacky case ISD::UMUL_LOHI: { 1747198090Srdivacky SDValue N0 = Node->getOperand(0); 1748198090Srdivacky SDValue N1 = Node->getOperand(1); 1749193323Sed 1750198090Srdivacky bool isSigned = Opcode == ISD::SMUL_LOHI; 1751198090Srdivacky if (!isSigned) { 1752198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1753198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1754198090Srdivacky case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1755198090Srdivacky case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1756198090Srdivacky case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1757198090Srdivacky case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1758193323Sed } 1759198090Srdivacky } else { 1760198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1761198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1762198090Srdivacky case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1763198090Srdivacky case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1764198090Srdivacky case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1765198090Srdivacky case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1766193323Sed } 1767198090Srdivacky } 1768193323Sed 1769198090Srdivacky unsigned LoReg, HiReg; 1770198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1771198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1772198090Srdivacky case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1773198090Srdivacky case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1774198090Srdivacky case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1775198090Srdivacky case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1776198090Srdivacky } 1777193323Sed 1778198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1779198090Srdivacky bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1780198090Srdivacky // Multiply is commmutative. 1781198090Srdivacky if (!foldedLoad) { 1782198090Srdivacky foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1783198090Srdivacky if (foldedLoad) 1784198090Srdivacky std::swap(N0, N1); 1785198090Srdivacky } 1786193323Sed 1787198090Srdivacky SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 1788198090Srdivacky N0, SDValue()).getValue(1); 1789198090Srdivacky 1790198090Srdivacky if (foldedLoad) { 1791198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1792198090Srdivacky InFlag }; 1793198090Srdivacky SDNode *CNode = 1794198090Srdivacky CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1795198090Srdivacky array_lengthof(Ops)); 1796198090Srdivacky InFlag = SDValue(CNode, 1); 1797198090Srdivacky // Update the chain. 1798198090Srdivacky ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1799198090Srdivacky } else { 1800198090Srdivacky InFlag = 1801198090Srdivacky SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1802198090Srdivacky } 1803198090Srdivacky 1804198090Srdivacky // Copy the low half of the result, if it is needed. 1805198090Srdivacky if (!N.getValue(0).use_empty()) { 1806198090Srdivacky SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1807198090Srdivacky LoReg, NVT, InFlag); 1808198090Srdivacky InFlag = Result.getValue(2); 1809198090Srdivacky ReplaceUses(N.getValue(0), Result); 1810193323Sed#ifndef NDEBUG 1811198090Srdivacky DEBUG({ 1812198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 1813198090Srdivacky Result.getNode()->dump(CurDAG); 1814198090Srdivacky errs() << '\n'; 1815198090Srdivacky }); 1816193323Sed#endif 1817198090Srdivacky } 1818198090Srdivacky // Copy the high half of the result, if it is needed. 1819198090Srdivacky if (!N.getValue(1).use_empty()) { 1820198090Srdivacky SDValue Result; 1821198090Srdivacky if (HiReg == X86::AH && Subtarget->is64Bit()) { 1822198090Srdivacky // Prevent use of AH in a REX instruction by referencing AX instead. 1823198090Srdivacky // Shift it down 8 bits. 1824198090Srdivacky Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1825198090Srdivacky X86::AX, MVT::i16, InFlag); 1826198090Srdivacky InFlag = Result.getValue(2); 1827198090Srdivacky Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 1828198090Srdivacky Result, 1829198090Srdivacky CurDAG->getTargetConstant(8, MVT::i8)), 0); 1830198090Srdivacky // Then truncate it down to i8. 1831198090Srdivacky Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 1832198090Srdivacky MVT::i8, Result); 1833198090Srdivacky } else { 1834198090Srdivacky Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1835198090Srdivacky HiReg, NVT, InFlag); 1836198090Srdivacky InFlag = Result.getValue(2); 1837193323Sed } 1838198090Srdivacky ReplaceUses(N.getValue(1), Result); 1839193323Sed#ifndef NDEBUG 1840198090Srdivacky DEBUG({ 1841198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 1842198090Srdivacky Result.getNode()->dump(CurDAG); 1843198090Srdivacky errs() << '\n'; 1844198090Srdivacky }); 1845193323Sed#endif 1846198090Srdivacky } 1847193323Sed 1848193323Sed#ifndef NDEBUG 1849198090Srdivacky Indent -= 2; 1850193323Sed#endif 1851193323Sed 1852198090Srdivacky return NULL; 1853198090Srdivacky } 1854193323Sed 1855198090Srdivacky case ISD::SDIVREM: 1856198090Srdivacky case ISD::UDIVREM: { 1857198090Srdivacky SDValue N0 = Node->getOperand(0); 1858198090Srdivacky SDValue N1 = Node->getOperand(1); 1859193323Sed 1860198090Srdivacky bool isSigned = Opcode == ISD::SDIVREM; 1861198090Srdivacky if (!isSigned) { 1862198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1863198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1864198090Srdivacky case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1865198090Srdivacky case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1866198090Srdivacky case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1867198090Srdivacky case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1868193323Sed } 1869198090Srdivacky } else { 1870198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1871198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1872198090Srdivacky case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1873198090Srdivacky case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1874198090Srdivacky case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1875198090Srdivacky case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1876198090Srdivacky } 1877198090Srdivacky } 1878193323Sed 1879198090Srdivacky unsigned LoReg, HiReg; 1880198090Srdivacky unsigned ClrOpcode, SExtOpcode; 1881198090Srdivacky switch (NVT.getSimpleVT().SimpleTy) { 1882198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 1883198090Srdivacky case MVT::i8: 1884198090Srdivacky LoReg = X86::AL; HiReg = X86::AH; 1885198090Srdivacky ClrOpcode = 0; 1886198090Srdivacky SExtOpcode = X86::CBW; 1887198090Srdivacky break; 1888198090Srdivacky case MVT::i16: 1889198090Srdivacky LoReg = X86::AX; HiReg = X86::DX; 1890198090Srdivacky ClrOpcode = X86::MOV16r0; 1891198090Srdivacky SExtOpcode = X86::CWD; 1892198090Srdivacky break; 1893198090Srdivacky case MVT::i32: 1894198090Srdivacky LoReg = X86::EAX; HiReg = X86::EDX; 1895198090Srdivacky ClrOpcode = X86::MOV32r0; 1896198090Srdivacky SExtOpcode = X86::CDQ; 1897198090Srdivacky break; 1898198090Srdivacky case MVT::i64: 1899198090Srdivacky LoReg = X86::RAX; HiReg = X86::RDX; 1900198090Srdivacky ClrOpcode = ~0U; // NOT USED. 1901198090Srdivacky SExtOpcode = X86::CQO; 1902198090Srdivacky break; 1903198090Srdivacky } 1904193323Sed 1905198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1906198090Srdivacky bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1907198090Srdivacky bool signBitIsZero = CurDAG->SignBitIsZero(N0); 1908198090Srdivacky 1909198090Srdivacky SDValue InFlag; 1910198090Srdivacky if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 1911198090Srdivacky // Special case for div8, just use a move with zero extension to AX to 1912198090Srdivacky // clear the upper 8 bits (AH). 1913198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 1914198090Srdivacky if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 1915198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 1916198090Srdivacky Move = 1917198090Srdivacky SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, 1918198090Srdivacky MVT::Other, Ops, 1919198090Srdivacky array_lengthof(Ops)), 0); 1920198090Srdivacky Chain = Move.getValue(1); 1921198090Srdivacky ReplaceUses(N0.getValue(1), Chain); 1922193323Sed } else { 1923198090Srdivacky Move = 1924198090Srdivacky SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); 1925198090Srdivacky Chain = CurDAG->getEntryNode(); 1926198090Srdivacky } 1927198090Srdivacky Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); 1928198090Srdivacky InFlag = Chain.getValue(1); 1929198090Srdivacky } else { 1930198090Srdivacky InFlag = 1931198090Srdivacky CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 1932198090Srdivacky LoReg, N0, SDValue()).getValue(1); 1933198090Srdivacky if (isSigned && !signBitIsZero) { 1934198090Srdivacky // Sign extend the low part into the high part. 1935193323Sed InFlag = 1936198090Srdivacky SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); 1937198090Srdivacky } else { 1938198090Srdivacky // Zero out the high part, effectively zero extending the input. 1939198090Srdivacky SDValue ClrNode; 1940198090Srdivacky 1941198090Srdivacky if (NVT.getSimpleVT() == MVT::i64) { 1942198090Srdivacky ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), 1943198090Srdivacky 0); 1944198090Srdivacky // We just did a 32-bit clear, insert it into a 64-bit register to 1945198090Srdivacky // clear the whole 64-bit reg. 1946198090Srdivacky SDValue Undef = 1947198090Srdivacky SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, 1948198090Srdivacky dl, MVT::i64), 0); 1949198090Srdivacky SDValue SubRegNo = 1950198090Srdivacky CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); 1951198090Srdivacky ClrNode = 1952198090Srdivacky SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, 1953198090Srdivacky MVT::i64, Undef, ClrNode, SubRegNo), 1954198090Srdivacky 0); 1955193323Sed } else { 1956198090Srdivacky ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); 1957193323Sed } 1958193323Sed 1959198090Srdivacky InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, 1960198090Srdivacky ClrNode, InFlag).getValue(1); 1961193323Sed } 1962198090Srdivacky } 1963193323Sed 1964198090Srdivacky if (foldedLoad) { 1965198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1966198090Srdivacky InFlag }; 1967198090Srdivacky SDNode *CNode = 1968198090Srdivacky CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1969198090Srdivacky array_lengthof(Ops)); 1970198090Srdivacky InFlag = SDValue(CNode, 1); 1971198090Srdivacky // Update the chain. 1972198090Srdivacky ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1973198090Srdivacky } else { 1974198090Srdivacky InFlag = 1975198090Srdivacky SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1976198090Srdivacky } 1977198090Srdivacky 1978198090Srdivacky // Copy the division (low) result, if it is needed. 1979198090Srdivacky if (!N.getValue(0).use_empty()) { 1980198090Srdivacky SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1981198090Srdivacky LoReg, NVT, InFlag); 1982198090Srdivacky InFlag = Result.getValue(2); 1983198090Srdivacky ReplaceUses(N.getValue(0), Result); 1984193323Sed#ifndef NDEBUG 1985198090Srdivacky DEBUG({ 1986198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 1987198090Srdivacky Result.getNode()->dump(CurDAG); 1988198090Srdivacky errs() << '\n'; 1989198090Srdivacky }); 1990193323Sed#endif 1991198090Srdivacky } 1992198090Srdivacky // Copy the remainder (high) result, if it is needed. 1993198090Srdivacky if (!N.getValue(1).use_empty()) { 1994198090Srdivacky SDValue Result; 1995198090Srdivacky if (HiReg == X86::AH && Subtarget->is64Bit()) { 1996198090Srdivacky // Prevent use of AH in a REX instruction by referencing AX instead. 1997198090Srdivacky // Shift it down 8 bits. 1998198090Srdivacky Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1999198090Srdivacky X86::AX, MVT::i16, InFlag); 2000198090Srdivacky InFlag = Result.getValue(2); 2001198090Srdivacky Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2002198090Srdivacky Result, 2003198090Srdivacky CurDAG->getTargetConstant(8, MVT::i8)), 2004198090Srdivacky 0); 2005198090Srdivacky // Then truncate it down to i8. 2006198090Srdivacky Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 2007198090Srdivacky MVT::i8, Result); 2008198090Srdivacky } else { 2009198090Srdivacky Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2010198090Srdivacky HiReg, NVT, InFlag); 2011198090Srdivacky InFlag = Result.getValue(2); 2012193323Sed } 2013198090Srdivacky ReplaceUses(N.getValue(1), Result); 2014193323Sed#ifndef NDEBUG 2015198090Srdivacky DEBUG({ 2016198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 2017198090Srdivacky Result.getNode()->dump(CurDAG); 2018198090Srdivacky errs() << '\n'; 2019198090Srdivacky }); 2020193323Sed#endif 2021198090Srdivacky } 2022193323Sed 2023193323Sed#ifndef NDEBUG 2024198090Srdivacky Indent -= 2; 2025193323Sed#endif 2026193323Sed 2027198090Srdivacky return NULL; 2028198090Srdivacky } 2029193323Sed 2030198090Srdivacky case X86ISD::CMP: { 2031198090Srdivacky SDValue N0 = Node->getOperand(0); 2032198090Srdivacky SDValue N1 = Node->getOperand(1); 2033198090Srdivacky 2034198090Srdivacky // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 2035198090Srdivacky // use a smaller encoding. 2036198090Srdivacky if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 2037198090Srdivacky N0.getValueType() != MVT::i8 && 2038198090Srdivacky X86::isZeroNode(N1)) { 2039198090Srdivacky ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 2040198090Srdivacky if (!C) break; 2041198090Srdivacky 2042198090Srdivacky // For example, convert "testl %eax, $8" to "testb %al, $8" 2043198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 2044198090Srdivacky (!(C->getZExtValue() & 0x80) || 2045198090Srdivacky HasNoSignedComparisonUses(Node))) { 2046198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); 2047198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2048198090Srdivacky 2049198090Srdivacky // On x86-32, only the ABCD registers have 8-bit subregisters. 2050198090Srdivacky if (!Subtarget->is64Bit()) { 2051198090Srdivacky TargetRegisterClass *TRC = 0; 2052198090Srdivacky switch (N0.getValueType().getSimpleVT().SimpleTy) { 2053198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2054198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2055198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2056198090Srdivacky } 2057198090Srdivacky SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2058198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2059198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2060198090Srdivacky } 2061198090Srdivacky 2062198090Srdivacky // Extract the l-register. 2063198090Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 2064198090Srdivacky MVT::i8, Reg); 2065198090Srdivacky 2066198090Srdivacky // Emit a testb. 2067198090Srdivacky return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); 2068193323Sed } 2069198090Srdivacky 2070198090Srdivacky // For example, "testl %eax, $2048" to "testb %ah, $8". 2071198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 2072198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2073198090Srdivacky HasNoSignedComparisonUses(Node))) { 2074198090Srdivacky // Shift the immediate right by 8 bits. 2075198090Srdivacky SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 2076198090Srdivacky MVT::i8); 2077198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2078198090Srdivacky 2079198090Srdivacky // Put the value in an ABCD register. 2080198090Srdivacky TargetRegisterClass *TRC = 0; 2081198090Srdivacky switch (N0.getValueType().getSimpleVT().SimpleTy) { 2082198090Srdivacky case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 2083198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2084198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2085198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2086198090Srdivacky } 2087198090Srdivacky SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2088198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2089198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2090198090Srdivacky 2091198090Srdivacky // Extract the h-register. 2092198090Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, 2093198090Srdivacky MVT::i8, Reg); 2094198090Srdivacky 2095198090Srdivacky // Emit a testb. No special NOREX tricks are needed since there's 2096198090Srdivacky // only one GPR operand! 2097198090Srdivacky return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 2098198090Srdivacky Subreg, ShiftedImm); 2099193323Sed } 2100198090Srdivacky 2101198090Srdivacky // For example, "testl %eax, $32776" to "testw %ax, $32776". 2102198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 2103198090Srdivacky N0.getValueType() != MVT::i16 && 2104198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2105198090Srdivacky HasNoSignedComparisonUses(Node))) { 2106198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); 2107198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2108198090Srdivacky 2109198090Srdivacky // Extract the 16-bit subregister. 2110198090Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, 2111198090Srdivacky MVT::i16, Reg); 2112198090Srdivacky 2113198090Srdivacky // Emit a testw. 2114198090Srdivacky return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); 2115193323Sed } 2116198090Srdivacky 2117198090Srdivacky // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 2118198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 2119198090Srdivacky N0.getValueType() == MVT::i64 && 2120198090Srdivacky (!(C->getZExtValue() & 0x80000000) || 2121198090Srdivacky HasNoSignedComparisonUses(Node))) { 2122198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 2123198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2124198090Srdivacky 2125198090Srdivacky // Extract the 32-bit subregister. 2126198090Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, 2127198090Srdivacky MVT::i32, Reg); 2128198090Srdivacky 2129198090Srdivacky // Emit a testl. 2130198090Srdivacky return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); 2131198090Srdivacky } 2132193323Sed } 2133198090Srdivacky break; 2134193323Sed } 2135198090Srdivacky } 2136193323Sed 2137193323Sed SDNode *ResNode = SelectCode(N); 2138193323Sed 2139193323Sed#ifndef NDEBUG 2140198090Srdivacky DEBUG({ 2141198090Srdivacky errs() << std::string(Indent-2, ' ') << "=> "; 2142198090Srdivacky if (ResNode == NULL || ResNode == N.getNode()) 2143198090Srdivacky N.getNode()->dump(CurDAG); 2144198090Srdivacky else 2145198090Srdivacky ResNode->dump(CurDAG); 2146198090Srdivacky errs() << '\n'; 2147198090Srdivacky }); 2148193323Sed Indent -= 2; 2149193323Sed#endif 2150193323Sed 2151193323Sed return ResNode; 2152193323Sed} 2153193323Sed 2154193323Sedbool X86DAGToDAGISel:: 2155193323SedSelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 2156193323Sed std::vector<SDValue> &OutOps) { 2157193323Sed SDValue Op0, Op1, Op2, Op3, Op4; 2158193323Sed switch (ConstraintCode) { 2159193323Sed case 'o': // offsetable ?? 2160193323Sed case 'v': // not offsetable ?? 2161193323Sed default: return true; 2162193323Sed case 'm': // memory 2163193323Sed if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) 2164193323Sed return true; 2165193323Sed break; 2166193323Sed } 2167193323Sed 2168193323Sed OutOps.push_back(Op0); 2169193323Sed OutOps.push_back(Op1); 2170193323Sed OutOps.push_back(Op2); 2171193323Sed OutOps.push_back(Op3); 2172193323Sed OutOps.push_back(Op4); 2173193323Sed return false; 2174193323Sed} 2175193323Sed 2176193323Sed/// createX86ISelDag - This pass converts a legalized DAG into a 2177193323Sed/// X86-specific DAG, ready for instruction scheduling. 2178193323Sed/// 2179193323SedFunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 2180193323Sed llvm::CodeGenOpt::Level OptLevel) { 2181193323Sed return new X86DAGToDAGISel(TM, OptLevel); 2182193323Sed} 2183