X86ISelDAGToDAG.cpp revision 199989
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15// Force NDEBUG on in any optimized build on Darwin. 16// 17// FIXME: This is a huge hack, to work around ridiculously awful compile times 18// on this file with gcc-4.2 on Darwin, in Release mode. 19#if (!defined(__llvm__) && defined(__APPLE__) && \ 20 defined(__OPTIMIZE__) && !defined(NDEBUG)) 21#define NDEBUG 22#endif 23 24#define DEBUG_TYPE "x86-isel" 25#include "X86.h" 26#include "X86InstrBuilder.h" 27#include "X86ISelLowering.h" 28#include "X86MachineFunctionInfo.h" 29#include "X86RegisterInfo.h" 30#include "X86Subtarget.h" 31#include "X86TargetMachine.h" 32#include "llvm/GlobalValue.h" 33#include "llvm/Instructions.h" 34#include "llvm/Intrinsics.h" 35#include "llvm/Support/CFG.h" 36#include "llvm/Type.h" 37#include "llvm/CodeGen/MachineConstantPool.h" 38#include "llvm/CodeGen/MachineFunction.h" 39#include "llvm/CodeGen/MachineFrameInfo.h" 40#include "llvm/CodeGen/MachineInstrBuilder.h" 41#include "llvm/CodeGen/MachineRegisterInfo.h" 42#include "llvm/CodeGen/SelectionDAGISel.h" 43#include "llvm/Target/TargetMachine.h" 44#include "llvm/Target/TargetOptions.h" 45#include "llvm/Support/Debug.h" 46#include "llvm/Support/ErrorHandling.h" 47#include "llvm/Support/MathExtras.h" 48#include "llvm/Support/raw_ostream.h" 49#include "llvm/ADT/SmallPtrSet.h" 50#include "llvm/ADT/Statistic.h" 51using namespace llvm; 52 53#include "llvm/Support/CommandLine.h" 54static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); 55 56STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 57 58//===----------------------------------------------------------------------===// 59// Pattern Matcher Implementation 60//===----------------------------------------------------------------------===// 61 62namespace { 63 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 64 /// SDValue's instead of register numbers for the leaves of the matched 65 /// tree. 66 struct X86ISelAddressMode { 67 enum { 68 RegBase, 69 FrameIndexBase 70 } BaseType; 71 72 struct { // This is really a union, discriminated by BaseType! 73 SDValue Reg; 74 int FrameIndex; 75 } Base; 76 77 unsigned Scale; 78 SDValue IndexReg; 79 int32_t Disp; 80 SDValue Segment; 81 GlobalValue *GV; 82 Constant *CP; 83 BlockAddress *BlockAddr; 84 const char *ES; 85 int JT; 86 unsigned Align; // CP alignment. 87 unsigned char SymbolFlags; // X86II::MO_* 88 89 X86ISelAddressMode() 90 : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), 91 Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), 92 SymbolFlags(X86II::MO_NO_FLAG) { 93 } 94 95 bool hasSymbolicDisplacement() const { 96 return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; 97 } 98 99 bool hasBaseOrIndexReg() const { 100 return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; 101 } 102 103 /// isRIPRelative - Return true if this addressing mode is already RIP 104 /// relative. 105 bool isRIPRelative() const { 106 if (BaseType != RegBase) return false; 107 if (RegisterSDNode *RegNode = 108 dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) 109 return RegNode->getReg() == X86::RIP; 110 return false; 111 } 112 113 void setBaseReg(SDValue Reg) { 114 BaseType = RegBase; 115 Base.Reg = Reg; 116 } 117 118 void dump() { 119 errs() << "X86ISelAddressMode " << this << '\n'; 120 errs() << "Base.Reg "; 121 if (Base.Reg.getNode() != 0) 122 Base.Reg.getNode()->dump(); 123 else 124 errs() << "nul"; 125 errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' 126 << " Scale" << Scale << '\n' 127 << "IndexReg "; 128 if (IndexReg.getNode() != 0) 129 IndexReg.getNode()->dump(); 130 else 131 errs() << "nul"; 132 errs() << " Disp " << Disp << '\n' 133 << "GV "; 134 if (GV) 135 GV->dump(); 136 else 137 errs() << "nul"; 138 errs() << " CP "; 139 if (CP) 140 CP->dump(); 141 else 142 errs() << "nul"; 143 errs() << '\n' 144 << "ES "; 145 if (ES) 146 errs() << ES; 147 else 148 errs() << "nul"; 149 errs() << " JT" << JT << " Align" << Align << '\n'; 150 } 151 }; 152} 153 154namespace { 155 //===--------------------------------------------------------------------===// 156 /// ISel - X86 specific code to select X86 machine instructions for 157 /// SelectionDAG operations. 158 /// 159 class X86DAGToDAGISel : public SelectionDAGISel { 160 /// X86Lowering - This object fully describes how to lower LLVM code to an 161 /// X86-specific SelectionDAG. 162 X86TargetLowering &X86Lowering; 163 164 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 165 /// make the right decision when generating code for different targets. 166 const X86Subtarget *Subtarget; 167 168 /// OptForSize - If true, selector should try to optimize for code size 169 /// instead of performance. 170 bool OptForSize; 171 172 public: 173 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 174 : SelectionDAGISel(tm, OptLevel), 175 X86Lowering(*tm.getTargetLowering()), 176 Subtarget(&tm.getSubtarget<X86Subtarget>()), 177 OptForSize(false) {} 178 179 virtual const char *getPassName() const { 180 return "X86 DAG->DAG Instruction Selection"; 181 } 182 183 /// InstructionSelect - This callback is invoked by 184 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 185 virtual void InstructionSelect(); 186 187 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 188 189 virtual 190 bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; 191 192// Include the pieces autogenerated from the target description. 193#include "X86GenDAGISel.inc" 194 195 private: 196 SDNode *Select(SDValue N); 197 SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 198 SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); 199 200 bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); 201 bool MatchLoad(SDValue N, X86ISelAddressMode &AM); 202 bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 203 bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 204 bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 205 unsigned Depth); 206 bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 207 bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, 208 SDValue &Scale, SDValue &Index, SDValue &Disp, 209 SDValue &Segment); 210 bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, 211 SDValue &Scale, SDValue &Index, SDValue &Disp); 212 bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 213 SDValue &Scale, SDValue &Index, SDValue &Disp); 214 bool SelectScalarSSELoad(SDValue Op, SDValue Pred, 215 SDValue N, SDValue &Base, SDValue &Scale, 216 SDValue &Index, SDValue &Disp, 217 SDValue &Segment, 218 SDValue &InChain, SDValue &OutChain); 219 bool TryFoldLoad(SDValue P, SDValue N, 220 SDValue &Base, SDValue &Scale, 221 SDValue &Index, SDValue &Disp, 222 SDValue &Segment); 223 void PreprocessForRMW(); 224 void PreprocessForFPConvert(); 225 226 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 227 /// inline asm expressions. 228 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 229 char ConstraintCode, 230 std::vector<SDValue> &OutOps); 231 232 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 233 234 inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 235 SDValue &Scale, SDValue &Index, 236 SDValue &Disp, SDValue &Segment) { 237 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 238 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 239 AM.Base.Reg; 240 Scale = getI8Imm(AM.Scale); 241 Index = AM.IndexReg; 242 // These are 32-bit even in 64-bit mode since RIP relative offset 243 // is 32-bit. 244 if (AM.GV) 245 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, 246 AM.SymbolFlags); 247 else if (AM.CP) 248 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 249 AM.Align, AM.Disp, AM.SymbolFlags); 250 else if (AM.ES) 251 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 252 else if (AM.JT != -1) 253 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 254 else if (AM.BlockAddr) 255 Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, 256 true, AM.SymbolFlags); 257 else 258 Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 259 260 if (AM.Segment.getNode()) 261 Segment = AM.Segment; 262 else 263 Segment = CurDAG->getRegister(0, MVT::i32); 264 } 265 266 /// getI8Imm - Return a target constant with the specified value, of type 267 /// i8. 268 inline SDValue getI8Imm(unsigned Imm) { 269 return CurDAG->getTargetConstant(Imm, MVT::i8); 270 } 271 272 /// getI16Imm - Return a target constant with the specified value, of type 273 /// i16. 274 inline SDValue getI16Imm(unsigned Imm) { 275 return CurDAG->getTargetConstant(Imm, MVT::i16); 276 } 277 278 /// getI32Imm - Return a target constant with the specified value, of type 279 /// i32. 280 inline SDValue getI32Imm(unsigned Imm) { 281 return CurDAG->getTargetConstant(Imm, MVT::i32); 282 } 283 284 /// getGlobalBaseReg - Return an SDNode that returns the value of 285 /// the global base register. Output instructions required to 286 /// initialize the global base register, if necessary. 287 /// 288 SDNode *getGlobalBaseReg(); 289 290 /// getTargetMachine - Return a reference to the TargetMachine, casted 291 /// to the target-specific type. 292 const X86TargetMachine &getTargetMachine() { 293 return static_cast<const X86TargetMachine &>(TM); 294 } 295 296 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 297 /// to the target-specific type. 298 const X86InstrInfo *getInstrInfo() { 299 return getTargetMachine().getInstrInfo(); 300 } 301 302#ifndef NDEBUG 303 unsigned Indent; 304#endif 305 }; 306} 307 308 309bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, 310 SDNode *Root) const { 311 if (OptLevel == CodeGenOpt::None) return false; 312 313 if (U == Root) 314 switch (U->getOpcode()) { 315 default: break; 316 case ISD::ADD: 317 case ISD::ADDC: 318 case ISD::ADDE: 319 case ISD::AND: 320 case ISD::OR: 321 case ISD::XOR: { 322 SDValue Op1 = U->getOperand(1); 323 324 // If the other operand is a 8-bit immediate we should fold the immediate 325 // instead. This reduces code size. 326 // e.g. 327 // movl 4(%esp), %eax 328 // addl $4, %eax 329 // vs. 330 // movl $4, %eax 331 // addl 4(%esp), %eax 332 // The former is 2 bytes shorter. In case where the increment is 1, then 333 // the saving can be 4 bytes (by using incl %eax). 334 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 335 if (Imm->getAPIntValue().isSignedIntN(8)) 336 return false; 337 338 // If the other operand is a TLS address, we should fold it instead. 339 // This produces 340 // movl %gs:0, %eax 341 // leal i@NTPOFF(%eax), %eax 342 // instead of 343 // movl $i@NTPOFF, %eax 344 // addl %gs:0, %eax 345 // if the block also has an access to a second TLS address this will save 346 // a load. 347 // FIXME: This is probably also true for non TLS addresses. 348 if (Op1.getOpcode() == X86ISD::Wrapper) { 349 SDValue Val = Op1.getOperand(0); 350 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 351 return false; 352 } 353 } 354 } 355 356 // Proceed to 'generic' cycle finder code 357 return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); 358} 359 360/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 361/// and move load below the TokenFactor. Replace store's chain operand with 362/// load's chain result. 363static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, 364 SDValue Store, SDValue TF) { 365 SmallVector<SDValue, 4> Ops; 366 for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) 367 if (Load.getNode() == TF.getOperand(i).getNode()) 368 Ops.push_back(Load.getOperand(0)); 369 else 370 Ops.push_back(TF.getOperand(i)); 371 SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); 372 SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, 373 Load.getOperand(1), 374 Load.getOperand(2)); 375 CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), 376 Store.getOperand(2), Store.getOperand(3)); 377} 378 379/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The 380/// chain produced by the load must only be used by the store's chain operand, 381/// otherwise this may produce a cycle in the DAG. 382/// 383static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, 384 SDValue &Load) { 385 if (N.getOpcode() == ISD::BIT_CONVERT) 386 N = N.getOperand(0); 387 388 LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 389 if (!LD || LD->isVolatile()) 390 return false; 391 if (LD->getAddressingMode() != ISD::UNINDEXED) 392 return false; 393 394 ISD::LoadExtType ExtType = LD->getExtensionType(); 395 if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 396 return false; 397 398 if (N.hasOneUse() && 399 LD->hasNUsesOfValue(1, 1) && 400 N.getOperand(1) == Address && 401 LD->isOperandOf(Chain.getNode())) { 402 Load = N; 403 return true; 404 } 405 return false; 406} 407 408/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain 409/// operand and move load below the call's chain operand. 410static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, 411 SDValue Call, SDValue CallSeqStart) { 412 SmallVector<SDValue, 8> Ops; 413 SDValue Chain = CallSeqStart.getOperand(0); 414 if (Chain.getNode() == Load.getNode()) 415 Ops.push_back(Load.getOperand(0)); 416 else { 417 assert(Chain.getOpcode() == ISD::TokenFactor && 418 "Unexpected CallSeqStart chain operand"); 419 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 420 if (Chain.getOperand(i).getNode() == Load.getNode()) 421 Ops.push_back(Load.getOperand(0)); 422 else 423 Ops.push_back(Chain.getOperand(i)); 424 SDValue NewChain = 425 CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), 426 MVT::Other, &Ops[0], Ops.size()); 427 Ops.clear(); 428 Ops.push_back(NewChain); 429 } 430 for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) 431 Ops.push_back(CallSeqStart.getOperand(i)); 432 CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); 433 CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), 434 Load.getOperand(1), Load.getOperand(2)); 435 Ops.clear(); 436 Ops.push_back(SDValue(Load.getNode(), 1)); 437 for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 438 Ops.push_back(Call.getOperand(i)); 439 CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); 440} 441 442/// isCalleeLoad - Return true if call address is a load and it can be 443/// moved below CALLSEQ_START and the chains leading up to the call. 444/// Return the CALLSEQ_START by reference as a second output. 445static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { 446 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 447 return false; 448 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 449 if (!LD || 450 LD->isVolatile() || 451 LD->getAddressingMode() != ISD::UNINDEXED || 452 LD->getExtensionType() != ISD::NON_EXTLOAD) 453 return false; 454 455 // Now let's find the callseq_start. 456 while (Chain.getOpcode() != ISD::CALLSEQ_START) { 457 if (!Chain.hasOneUse()) 458 return false; 459 Chain = Chain.getOperand(0); 460 } 461 462 if (Chain.getOperand(0).getNode() == Callee.getNode()) 463 return true; 464 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 465 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 466 Callee.getValue(1).hasOneUse()) 467 return true; 468 return false; 469} 470 471 472/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 473/// This is only run if not in -O0 mode. 474/// This allows the instruction selector to pick more read-modify-write 475/// instructions. This is a common case: 476/// 477/// [Load chain] 478/// ^ 479/// | 480/// [Load] 481/// ^ ^ 482/// | | 483/// / \- 484/// / | 485/// [TokenFactor] [Op] 486/// ^ ^ 487/// | | 488/// \ / 489/// \ / 490/// [Store] 491/// 492/// The fact the store's chain operand != load's chain will prevent the 493/// (store (op (load))) instruction from being selected. We can transform it to: 494/// 495/// [Load chain] 496/// ^ 497/// | 498/// [TokenFactor] 499/// ^ 500/// | 501/// [Load] 502/// ^ ^ 503/// | | 504/// | \- 505/// | | 506/// | [Op] 507/// | ^ 508/// | | 509/// \ / 510/// \ / 511/// [Store] 512void X86DAGToDAGISel::PreprocessForRMW() { 513 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 514 E = CurDAG->allnodes_end(); I != E; ++I) { 515 if (I->getOpcode() == X86ISD::CALL) { 516 /// Also try moving call address load from outside callseq_start to just 517 /// before the call to allow it to be folded. 518 /// 519 /// [Load chain] 520 /// ^ 521 /// | 522 /// [Load] 523 /// ^ ^ 524 /// | | 525 /// / \-- 526 /// / | 527 ///[CALLSEQ_START] | 528 /// ^ | 529 /// | | 530 /// [LOAD/C2Reg] | 531 /// | | 532 /// \ / 533 /// \ / 534 /// [CALL] 535 SDValue Chain = I->getOperand(0); 536 SDValue Load = I->getOperand(1); 537 if (!isCalleeLoad(Load, Chain)) 538 continue; 539 MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); 540 ++NumLoadMoved; 541 continue; 542 } 543 544 if (!ISD::isNON_TRUNCStore(I)) 545 continue; 546 SDValue Chain = I->getOperand(0); 547 548 if (Chain.getNode()->getOpcode() != ISD::TokenFactor) 549 continue; 550 551 SDValue N1 = I->getOperand(1); 552 SDValue N2 = I->getOperand(2); 553 if ((N1.getValueType().isFloatingPoint() && 554 !N1.getValueType().isVector()) || 555 !N1.hasOneUse()) 556 continue; 557 558 bool RModW = false; 559 SDValue Load; 560 unsigned Opcode = N1.getNode()->getOpcode(); 561 switch (Opcode) { 562 case ISD::ADD: 563 case ISD::MUL: 564 case ISD::AND: 565 case ISD::OR: 566 case ISD::XOR: 567 case ISD::ADDC: 568 case ISD::ADDE: 569 case ISD::VECTOR_SHUFFLE: { 570 SDValue N10 = N1.getOperand(0); 571 SDValue N11 = N1.getOperand(1); 572 RModW = isRMWLoad(N10, Chain, N2, Load); 573 if (!RModW) 574 RModW = isRMWLoad(N11, Chain, N2, Load); 575 break; 576 } 577 case ISD::SUB: 578 case ISD::SHL: 579 case ISD::SRA: 580 case ISD::SRL: 581 case ISD::ROTL: 582 case ISD::ROTR: 583 case ISD::SUBC: 584 case ISD::SUBE: 585 case X86ISD::SHLD: 586 case X86ISD::SHRD: { 587 SDValue N10 = N1.getOperand(0); 588 RModW = isRMWLoad(N10, Chain, N2, Load); 589 break; 590 } 591 } 592 593 if (RModW) { 594 MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); 595 ++NumLoadMoved; 596 } 597 } 598} 599 600 601/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 602/// nodes that target the FP stack to be store and load to the stack. This is a 603/// gross hack. We would like to simply mark these as being illegal, but when 604/// we do that, legalize produces these when it expands calls, then expands 605/// these in the same legalize pass. We would like dag combine to be able to 606/// hack on these between the call expansion and the node legalization. As such 607/// this pass basically does "really late" legalization of these inline with the 608/// X86 isel pass. 609void X86DAGToDAGISel::PreprocessForFPConvert() { 610 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 611 E = CurDAG->allnodes_end(); I != E; ) { 612 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 613 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 614 continue; 615 616 // If the source and destination are SSE registers, then this is a legal 617 // conversion that should not be lowered. 618 EVT SrcVT = N->getOperand(0).getValueType(); 619 EVT DstVT = N->getValueType(0); 620 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 621 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 622 if (SrcIsSSE && DstIsSSE) 623 continue; 624 625 if (!SrcIsSSE && !DstIsSSE) { 626 // If this is an FPStack extension, it is a noop. 627 if (N->getOpcode() == ISD::FP_EXTEND) 628 continue; 629 // If this is a value-preserving FPStack truncation, it is a noop. 630 if (N->getConstantOperandVal(1)) 631 continue; 632 } 633 634 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 635 // FPStack has extload and truncstore. SSE can fold direct loads into other 636 // operations. Based on this, decide what we want to do. 637 EVT MemVT; 638 if (N->getOpcode() == ISD::FP_ROUND) 639 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 640 else 641 MemVT = SrcIsSSE ? SrcVT : DstVT; 642 643 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 644 DebugLoc dl = N->getDebugLoc(); 645 646 // FIXME: optimize the case where the src/dest is a load or store? 647 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 648 N->getOperand(0), 649 MemTmp, NULL, 0, MemVT); 650 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 651 NULL, 0, MemVT); 652 653 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 654 // extload we created. This will cause general havok on the dag because 655 // anything below the conversion could be folded into other existing nodes. 656 // To avoid invalidating 'I', back it up to the convert node. 657 --I; 658 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 659 660 // Now that we did that, the node is dead. Increment the iterator to the 661 // next node to process, then delete N. 662 ++I; 663 CurDAG->DeleteNode(N); 664 } 665} 666 667/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 668/// when it has created a SelectionDAG for us to codegen. 669void X86DAGToDAGISel::InstructionSelect() { 670 const Function *F = MF->getFunction(); 671 OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); 672 673 if (OptLevel != CodeGenOpt::None) 674 PreprocessForRMW(); 675 676 // FIXME: This should only happen when not compiled with -O0. 677 PreprocessForFPConvert(); 678 679 // Codegen the basic block. 680#ifndef NDEBUG 681 DEBUG(errs() << "===== Instruction selection begins:\n"); 682 Indent = 0; 683#endif 684 SelectRoot(*CurDAG); 685#ifndef NDEBUG 686 DEBUG(errs() << "===== Instruction selection ends:\n"); 687#endif 688 689 CurDAG->RemoveDeadNodes(); 690} 691 692/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 693/// the main function. 694void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 695 MachineFrameInfo *MFI) { 696 const TargetInstrInfo *TII = TM.getInstrInfo(); 697 if (Subtarget->isTargetCygMing()) 698 BuildMI(BB, DebugLoc::getUnknownLoc(), 699 TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 700} 701 702void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 703 // If this is main, emit special code for main. 704 MachineBasicBlock *BB = MF.begin(); 705 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 706 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 707} 708 709 710bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, 711 X86ISelAddressMode &AM) { 712 assert(N.getOpcode() == X86ISD::SegmentBaseAddress); 713 SDValue Segment = N.getOperand(0); 714 715 if (AM.Segment.getNode() == 0) { 716 AM.Segment = Segment; 717 return false; 718 } 719 720 return true; 721} 722 723bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { 724 // This optimization is valid because the GNU TLS model defines that 725 // gs:0 (or fs:0 on X86-64) contains its own address. 726 // For more information see http://people.redhat.com/drepper/tls.pdf 727 728 SDValue Address = N.getOperand(1); 729 if (Address.getOpcode() == X86ISD::SegmentBaseAddress && 730 !MatchSegmentBaseAddress (Address, AM)) 731 return false; 732 733 return true; 734} 735 736/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 737/// into an addressing mode. These wrap things that will resolve down into a 738/// symbol reference. If no match is possible, this returns true, otherwise it 739/// returns false. 740bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 741 // If the addressing mode already has a symbol as the displacement, we can 742 // never match another symbol. 743 if (AM.hasSymbolicDisplacement()) 744 return true; 745 746 SDValue N0 = N.getOperand(0); 747 CodeModel::Model M = TM.getCodeModel(); 748 749 // Handle X86-64 rip-relative addresses. We check this before checking direct 750 // folding because RIP is preferable to non-RIP accesses. 751 if (Subtarget->is64Bit() && 752 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 753 // they cannot be folded into immediate fields. 754 // FIXME: This can be improved for kernel and other models? 755 (M == CodeModel::Small || M == CodeModel::Kernel) && 756 // Base and index reg must be 0 in order to use %rip as base and lowering 757 // must allow RIP. 758 !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { 759 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 760 int64_t Offset = AM.Disp + G->getOffset(); 761 if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 762 AM.GV = G->getGlobal(); 763 AM.Disp = Offset; 764 AM.SymbolFlags = G->getTargetFlags(); 765 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 766 int64_t Offset = AM.Disp + CP->getOffset(); 767 if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 768 AM.CP = CP->getConstVal(); 769 AM.Align = CP->getAlignment(); 770 AM.Disp = Offset; 771 AM.SymbolFlags = CP->getTargetFlags(); 772 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 773 AM.ES = S->getSymbol(); 774 AM.SymbolFlags = S->getTargetFlags(); 775 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 776 AM.JT = J->getIndex(); 777 AM.SymbolFlags = J->getTargetFlags(); 778 } else { 779 AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); 780 AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); 781 } 782 783 if (N.getOpcode() == X86ISD::WrapperRIP) 784 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 785 return false; 786 } 787 788 // Handle the case when globals fit in our immediate field: This is true for 789 // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit 790 // mode, this results in a non-RIP-relative computation. 791 if (!Subtarget->is64Bit() || 792 ((M == CodeModel::Small || M == CodeModel::Kernel) && 793 TM.getRelocationModel() == Reloc::Static)) { 794 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 795 AM.GV = G->getGlobal(); 796 AM.Disp += G->getOffset(); 797 AM.SymbolFlags = G->getTargetFlags(); 798 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 799 AM.CP = CP->getConstVal(); 800 AM.Align = CP->getAlignment(); 801 AM.Disp += CP->getOffset(); 802 AM.SymbolFlags = CP->getTargetFlags(); 803 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 804 AM.ES = S->getSymbol(); 805 AM.SymbolFlags = S->getTargetFlags(); 806 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 807 AM.JT = J->getIndex(); 808 AM.SymbolFlags = J->getTargetFlags(); 809 } else { 810 AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); 811 AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); 812 } 813 return false; 814 } 815 816 return true; 817} 818 819/// MatchAddress - Add the specified node to the specified addressing mode, 820/// returning true if it cannot be done. This just pattern matches for the 821/// addressing mode. 822bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 823 if (MatchAddressRecursively(N, AM, 0)) 824 return true; 825 826 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 827 // a smaller encoding and avoids a scaled-index. 828 if (AM.Scale == 2 && 829 AM.BaseType == X86ISelAddressMode::RegBase && 830 AM.Base.Reg.getNode() == 0) { 831 AM.Base.Reg = AM.IndexReg; 832 AM.Scale = 1; 833 } 834 835 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 836 // because it has a smaller encoding. 837 // TODO: Which other code models can use this? 838 if (TM.getCodeModel() == CodeModel::Small && 839 Subtarget->is64Bit() && 840 AM.Scale == 1 && 841 AM.BaseType == X86ISelAddressMode::RegBase && 842 AM.Base.Reg.getNode() == 0 && 843 AM.IndexReg.getNode() == 0 && 844 AM.SymbolFlags == X86II::MO_NO_FLAG && 845 AM.hasSymbolicDisplacement()) 846 AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 847 848 return false; 849} 850 851bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 852 unsigned Depth) { 853 bool is64Bit = Subtarget->is64Bit(); 854 DebugLoc dl = N.getDebugLoc(); 855 DEBUG({ 856 errs() << "MatchAddress: "; 857 AM.dump(); 858 }); 859 // Limit recursion. 860 if (Depth > 5) 861 return MatchAddressBase(N, AM); 862 863 CodeModel::Model M = TM.getCodeModel(); 864 865 // If this is already a %rip relative address, we can only merge immediates 866 // into it. Instead of handling this in every case, we handle it here. 867 // RIP relative addressing: %rip + 32-bit displacement! 868 if (AM.isRIPRelative()) { 869 // FIXME: JumpTable and ExternalSymbol address currently don't like 870 // displacements. It isn't very important, but this should be fixed for 871 // consistency. 872 if (!AM.ES && AM.JT != -1) return true; 873 874 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { 875 int64_t Val = AM.Disp + Cst->getSExtValue(); 876 if (X86::isOffsetSuitableForCodeModel(Val, M, 877 AM.hasSymbolicDisplacement())) { 878 AM.Disp = Val; 879 return false; 880 } 881 } 882 return true; 883 } 884 885 switch (N.getOpcode()) { 886 default: break; 887 case ISD::Constant: { 888 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 889 if (!is64Bit || 890 X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, 891 AM.hasSymbolicDisplacement())) { 892 AM.Disp += Val; 893 return false; 894 } 895 break; 896 } 897 898 case X86ISD::SegmentBaseAddress: 899 if (!MatchSegmentBaseAddress(N, AM)) 900 return false; 901 break; 902 903 case X86ISD::Wrapper: 904 case X86ISD::WrapperRIP: 905 if (!MatchWrapper(N, AM)) 906 return false; 907 break; 908 909 case ISD::LOAD: 910 if (!MatchLoad(N, AM)) 911 return false; 912 break; 913 914 case ISD::FrameIndex: 915 if (AM.BaseType == X86ISelAddressMode::RegBase 916 && AM.Base.Reg.getNode() == 0) { 917 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 918 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 919 return false; 920 } 921 break; 922 923 case ISD::SHL: 924 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) 925 break; 926 927 if (ConstantSDNode 928 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 929 unsigned Val = CN->getZExtValue(); 930 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 931 // that the base operand remains free for further matching. If 932 // the base doesn't end up getting used, a post-processing step 933 // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 934 if (Val == 1 || Val == 2 || Val == 3) { 935 AM.Scale = 1 << Val; 936 SDValue ShVal = N.getNode()->getOperand(0); 937 938 // Okay, we know that we have a scale by now. However, if the scaled 939 // value is an add of something and a constant, we can fold the 940 // constant into the disp field here. 941 if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 942 isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { 943 AM.IndexReg = ShVal.getNode()->getOperand(0); 944 ConstantSDNode *AddVal = 945 cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 946 uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); 947 if (!is64Bit || 948 X86::isOffsetSuitableForCodeModel(Disp, M, 949 AM.hasSymbolicDisplacement())) 950 AM.Disp = Disp; 951 else 952 AM.IndexReg = ShVal; 953 } else { 954 AM.IndexReg = ShVal; 955 } 956 return false; 957 } 958 break; 959 } 960 961 case ISD::SMUL_LOHI: 962 case ISD::UMUL_LOHI: 963 // A mul_lohi where we need the low part can be folded as a plain multiply. 964 if (N.getResNo() != 0) break; 965 // FALL THROUGH 966 case ISD::MUL: 967 case X86ISD::MUL_IMM: 968 // X*[3,5,9] -> X+X*[2,4,8] 969 if (AM.BaseType == X86ISelAddressMode::RegBase && 970 AM.Base.Reg.getNode() == 0 && 971 AM.IndexReg.getNode() == 0) { 972 if (ConstantSDNode 973 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 974 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 975 CN->getZExtValue() == 9) { 976 AM.Scale = unsigned(CN->getZExtValue())-1; 977 978 SDValue MulVal = N.getNode()->getOperand(0); 979 SDValue Reg; 980 981 // Okay, we know that we have a scale by now. However, if the scaled 982 // value is an add of something and a constant, we can fold the 983 // constant into the disp field here. 984 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 985 isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 986 Reg = MulVal.getNode()->getOperand(0); 987 ConstantSDNode *AddVal = 988 cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 989 uint64_t Disp = AM.Disp + AddVal->getSExtValue() * 990 CN->getZExtValue(); 991 if (!is64Bit || 992 X86::isOffsetSuitableForCodeModel(Disp, M, 993 AM.hasSymbolicDisplacement())) 994 AM.Disp = Disp; 995 else 996 Reg = N.getNode()->getOperand(0); 997 } else { 998 Reg = N.getNode()->getOperand(0); 999 } 1000 1001 AM.IndexReg = AM.Base.Reg = Reg; 1002 return false; 1003 } 1004 } 1005 break; 1006 1007 case ISD::SUB: { 1008 // Given A-B, if A can be completely folded into the address and 1009 // the index field with the index field unused, use -B as the index. 1010 // This is a win if a has multiple parts that can be folded into 1011 // the address. Also, this saves a mov if the base register has 1012 // other uses, since it avoids a two-address sub instruction, however 1013 // it costs an additional mov if the index register has other uses. 1014 1015 // Test if the LHS of the sub can be folded. 1016 X86ISelAddressMode Backup = AM; 1017 if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 1018 AM = Backup; 1019 break; 1020 } 1021 // Test if the index field is free for use. 1022 if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 1023 AM = Backup; 1024 break; 1025 } 1026 int Cost = 0; 1027 SDValue RHS = N.getNode()->getOperand(1); 1028 // If the RHS involves a register with multiple uses, this 1029 // transformation incurs an extra mov, due to the neg instruction 1030 // clobbering its operand. 1031 if (!RHS.getNode()->hasOneUse() || 1032 RHS.getNode()->getOpcode() == ISD::CopyFromReg || 1033 RHS.getNode()->getOpcode() == ISD::TRUNCATE || 1034 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 1035 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 1036 RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 1037 ++Cost; 1038 // If the base is a register with multiple uses, this 1039 // transformation may save a mov. 1040 if ((AM.BaseType == X86ISelAddressMode::RegBase && 1041 AM.Base.Reg.getNode() && 1042 !AM.Base.Reg.getNode()->hasOneUse()) || 1043 AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1044 --Cost; 1045 // If the folded LHS was interesting, this transformation saves 1046 // address arithmetic. 1047 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 1048 ((AM.Disp != 0) && (Backup.Disp == 0)) + 1049 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 1050 --Cost; 1051 // If it doesn't look like it may be an overall win, don't do it. 1052 if (Cost >= 0) { 1053 AM = Backup; 1054 break; 1055 } 1056 1057 // Ok, the transformation is legal and appears profitable. Go for it. 1058 SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 1059 SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 1060 AM.IndexReg = Neg; 1061 AM.Scale = 1; 1062 1063 // Insert the new nodes into the topological ordering. 1064 if (Zero.getNode()->getNodeId() == -1 || 1065 Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1066 CurDAG->RepositionNode(N.getNode(), Zero.getNode()); 1067 Zero.getNode()->setNodeId(N.getNode()->getNodeId()); 1068 } 1069 if (Neg.getNode()->getNodeId() == -1 || 1070 Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1071 CurDAG->RepositionNode(N.getNode(), Neg.getNode()); 1072 Neg.getNode()->setNodeId(N.getNode()->getNodeId()); 1073 } 1074 return false; 1075 } 1076 1077 case ISD::ADD: { 1078 X86ISelAddressMode Backup = AM; 1079 if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && 1080 !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) 1081 return false; 1082 AM = Backup; 1083 if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && 1084 !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) 1085 return false; 1086 AM = Backup; 1087 1088 // If we couldn't fold both operands into the address at the same time, 1089 // see if we can just put each operand into a register and fold at least 1090 // the add. 1091 if (AM.BaseType == X86ISelAddressMode::RegBase && 1092 !AM.Base.Reg.getNode() && 1093 !AM.IndexReg.getNode()) { 1094 AM.Base.Reg = N.getNode()->getOperand(0); 1095 AM.IndexReg = N.getNode()->getOperand(1); 1096 AM.Scale = 1; 1097 return false; 1098 } 1099 break; 1100 } 1101 1102 case ISD::OR: 1103 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 1104 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1105 X86ISelAddressMode Backup = AM; 1106 uint64_t Offset = CN->getSExtValue(); 1107 // Start with the LHS as an addr mode. 1108 if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1109 // Address could not have picked a GV address for the displacement. 1110 AM.GV == NULL && 1111 // On x86-64, the resultant disp must fit in 32-bits. 1112 (!is64Bit || 1113 X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, 1114 AM.hasSymbolicDisplacement())) && 1115 // Check to see if the LHS & C is zero. 1116 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 1117 AM.Disp += Offset; 1118 return false; 1119 } 1120 AM = Backup; 1121 } 1122 break; 1123 1124 case ISD::AND: { 1125 // Perform some heroic transforms on an and of a constant-count shift 1126 // with a constant to enable use of the scaled offset field. 1127 1128 SDValue Shift = N.getOperand(0); 1129 if (Shift.getNumOperands() != 2) break; 1130 1131 // Scale must not be used already. 1132 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1133 1134 SDValue X = Shift.getOperand(0); 1135 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1136 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 1137 if (!C1 || !C2) break; 1138 1139 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This 1140 // allows us to convert the shift and and into an h-register extract and 1141 // a scaled index. 1142 if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { 1143 unsigned ScaleLog = 8 - C1->getZExtValue(); 1144 if (ScaleLog > 0 && ScaleLog < 4 && 1145 C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { 1146 SDValue Eight = CurDAG->getConstant(8, MVT::i8); 1147 SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); 1148 SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1149 X, Eight); 1150 SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), 1151 Srl, Mask); 1152 SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); 1153 SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1154 And, ShlCount); 1155 1156 // Insert the new nodes into the topological ordering. 1157 if (Eight.getNode()->getNodeId() == -1 || 1158 Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1159 CurDAG->RepositionNode(X.getNode(), Eight.getNode()); 1160 Eight.getNode()->setNodeId(X.getNode()->getNodeId()); 1161 } 1162 if (Mask.getNode()->getNodeId() == -1 || 1163 Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1164 CurDAG->RepositionNode(X.getNode(), Mask.getNode()); 1165 Mask.getNode()->setNodeId(X.getNode()->getNodeId()); 1166 } 1167 if (Srl.getNode()->getNodeId() == -1 || 1168 Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1169 CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); 1170 Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1171 } 1172 if (And.getNode()->getNodeId() == -1 || 1173 And.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1174 CurDAG->RepositionNode(N.getNode(), And.getNode()); 1175 And.getNode()->setNodeId(N.getNode()->getNodeId()); 1176 } 1177 if (ShlCount.getNode()->getNodeId() == -1 || 1178 ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1179 CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); 1180 ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); 1181 } 1182 if (Shl.getNode()->getNodeId() == -1 || 1183 Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1184 CurDAG->RepositionNode(N.getNode(), Shl.getNode()); 1185 Shl.getNode()->setNodeId(N.getNode()->getNodeId()); 1186 } 1187 CurDAG->ReplaceAllUsesWith(N, Shl); 1188 AM.IndexReg = And; 1189 AM.Scale = (1 << ScaleLog); 1190 return false; 1191 } 1192 } 1193 1194 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 1195 // allows us to fold the shift into this addressing mode. 1196 if (Shift.getOpcode() != ISD::SHL) break; 1197 1198 // Not likely to be profitable if either the AND or SHIFT node has more 1199 // than one use (unless all uses are for address computation). Besides, 1200 // isel mechanism requires their node ids to be reused. 1201 if (!N.hasOneUse() || !Shift.hasOneUse()) 1202 break; 1203 1204 // Verify that the shift amount is something we can fold. 1205 unsigned ShiftCst = C1->getZExtValue(); 1206 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 1207 break; 1208 1209 // Get the new AND mask, this folds to a constant. 1210 SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1211 SDValue(C2, 0), SDValue(C1, 0)); 1212 SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 1213 NewANDMask); 1214 SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1215 NewAND, SDValue(C1, 0)); 1216 1217 // Insert the new nodes into the topological ordering. 1218 if (C1->getNodeId() > X.getNode()->getNodeId()) { 1219 CurDAG->RepositionNode(X.getNode(), C1); 1220 C1->setNodeId(X.getNode()->getNodeId()); 1221 } 1222 if (NewANDMask.getNode()->getNodeId() == -1 || 1223 NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1224 CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); 1225 NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); 1226 } 1227 if (NewAND.getNode()->getNodeId() == -1 || 1228 NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1229 CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); 1230 NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1231 } 1232 if (NewSHIFT.getNode()->getNodeId() == -1 || 1233 NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1234 CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); 1235 NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); 1236 } 1237 1238 CurDAG->ReplaceAllUsesWith(N, NewSHIFT); 1239 1240 AM.Scale = 1 << ShiftCst; 1241 AM.IndexReg = NewAND; 1242 return false; 1243 } 1244 } 1245 1246 return MatchAddressBase(N, AM); 1247} 1248 1249/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1250/// specified addressing mode without any further recursion. 1251bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1252 // Is the base register already occupied? 1253 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { 1254 // If so, check to see if the scale index register is set. 1255 if (AM.IndexReg.getNode() == 0) { 1256 AM.IndexReg = N; 1257 AM.Scale = 1; 1258 return false; 1259 } 1260 1261 // Otherwise, we cannot select it. 1262 return true; 1263 } 1264 1265 // Default, generate it as a register. 1266 AM.BaseType = X86ISelAddressMode::RegBase; 1267 AM.Base.Reg = N; 1268 return false; 1269} 1270 1271/// SelectAddr - returns true if it is able pattern match an addressing mode. 1272/// It returns the operands which make up the maximal addressing mode it can 1273/// match by reference. 1274bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, 1275 SDValue &Scale, SDValue &Index, 1276 SDValue &Disp, SDValue &Segment) { 1277 X86ISelAddressMode AM; 1278 bool Done = false; 1279 if (AvoidDupAddrCompute && !N.hasOneUse()) { 1280 unsigned Opcode = N.getOpcode(); 1281 if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && 1282 Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { 1283 // If we are able to fold N into addressing mode, then we'll allow it even 1284 // if N has multiple uses. In general, addressing computation is used as 1285 // addresses by all of its uses. But watch out for CopyToReg uses, that 1286 // means the address computation is liveout. It will be computed by a LEA 1287 // so we want to avoid computing the address twice. 1288 for (SDNode::use_iterator UI = N.getNode()->use_begin(), 1289 UE = N.getNode()->use_end(); UI != UE; ++UI) { 1290 if (UI->getOpcode() == ISD::CopyToReg) { 1291 MatchAddressBase(N, AM); 1292 Done = true; 1293 break; 1294 } 1295 } 1296 } 1297 } 1298 1299 if (!Done && MatchAddress(N, AM)) 1300 return false; 1301 1302 EVT VT = N.getValueType(); 1303 if (AM.BaseType == X86ISelAddressMode::RegBase) { 1304 if (!AM.Base.Reg.getNode()) 1305 AM.Base.Reg = CurDAG->getRegister(0, VT); 1306 } 1307 1308 if (!AM.IndexReg.getNode()) 1309 AM.IndexReg = CurDAG->getRegister(0, VT); 1310 1311 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1312 return true; 1313} 1314 1315/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1316/// match a load whose top elements are either undef or zeros. The load flavor 1317/// is derived from the type of N, which is either v4f32 or v2f64. 1318bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, 1319 SDValue N, SDValue &Base, 1320 SDValue &Scale, SDValue &Index, 1321 SDValue &Disp, SDValue &Segment, 1322 SDValue &InChain, 1323 SDValue &OutChain) { 1324 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1325 InChain = N.getOperand(0).getValue(1); 1326 if (ISD::isNON_EXTLoad(InChain.getNode()) && 1327 InChain.getValue(0).hasOneUse() && 1328 N.hasOneUse() && 1329 IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { 1330 LoadSDNode *LD = cast<LoadSDNode>(InChain); 1331 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1332 return false; 1333 OutChain = LD->getChain(); 1334 return true; 1335 } 1336 } 1337 1338 // Also handle the case where we explicitly require zeros in the top 1339 // elements. This is a vector shuffle from the zero vector. 1340 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1341 // Check to see if the top elements are all zeros (or bitcast of zeros). 1342 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1343 N.getOperand(0).getNode()->hasOneUse() && 1344 ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1345 N.getOperand(0).getOperand(0).hasOneUse()) { 1346 // Okay, this is a zero extending load. Fold it. 1347 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1348 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1349 return false; 1350 OutChain = LD->getChain(); 1351 InChain = SDValue(LD, 1); 1352 return true; 1353 } 1354 return false; 1355} 1356 1357 1358/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1359/// mode it matches can be cost effectively emitted as an LEA instruction. 1360bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, 1361 SDValue &Base, SDValue &Scale, 1362 SDValue &Index, SDValue &Disp) { 1363 X86ISelAddressMode AM; 1364 1365 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1366 // segments. 1367 SDValue Copy = AM.Segment; 1368 SDValue T = CurDAG->getRegister(0, MVT::i32); 1369 AM.Segment = T; 1370 if (MatchAddress(N, AM)) 1371 return false; 1372 assert (T == AM.Segment); 1373 AM.Segment = Copy; 1374 1375 EVT VT = N.getValueType(); 1376 unsigned Complexity = 0; 1377 if (AM.BaseType == X86ISelAddressMode::RegBase) 1378 if (AM.Base.Reg.getNode()) 1379 Complexity = 1; 1380 else 1381 AM.Base.Reg = CurDAG->getRegister(0, VT); 1382 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1383 Complexity = 4; 1384 1385 if (AM.IndexReg.getNode()) 1386 Complexity++; 1387 else 1388 AM.IndexReg = CurDAG->getRegister(0, VT); 1389 1390 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1391 // a simple shift. 1392 if (AM.Scale > 1) 1393 Complexity++; 1394 1395 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1396 // to a LEA. This is determined with some expermentation but is by no means 1397 // optimal (especially for code size consideration). LEA is nice because of 1398 // its three-address nature. Tweak the cost function again when we can run 1399 // convertToThreeAddress() at register allocation time. 1400 if (AM.hasSymbolicDisplacement()) { 1401 // For X86-64, we should always use lea to materialize RIP relative 1402 // addresses. 1403 if (Subtarget->is64Bit()) 1404 Complexity = 4; 1405 else 1406 Complexity += 2; 1407 } 1408 1409 if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 1410 Complexity++; 1411 1412 // If it isn't worth using an LEA, reject it. 1413 if (Complexity <= 2) 1414 return false; 1415 1416 SDValue Segment; 1417 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1418 return true; 1419} 1420 1421/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1422bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 1423 SDValue &Scale, SDValue &Index, 1424 SDValue &Disp) { 1425 assert(Op.getOpcode() == X86ISD::TLSADDR); 1426 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1427 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1428 1429 X86ISelAddressMode AM; 1430 AM.GV = GA->getGlobal(); 1431 AM.Disp += GA->getOffset(); 1432 AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); 1433 AM.SymbolFlags = GA->getTargetFlags(); 1434 1435 if (N.getValueType() == MVT::i32) { 1436 AM.Scale = 1; 1437 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1438 } else { 1439 AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1440 } 1441 1442 SDValue Segment; 1443 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1444 return true; 1445} 1446 1447 1448bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, 1449 SDValue &Base, SDValue &Scale, 1450 SDValue &Index, SDValue &Disp, 1451 SDValue &Segment) { 1452 if (ISD::isNON_EXTLoad(N.getNode()) && 1453 N.hasOneUse() && 1454 IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) 1455 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); 1456 return false; 1457} 1458 1459/// getGlobalBaseReg - Return an SDNode that returns the value of 1460/// the global base register. Output instructions required to 1461/// initialize the global base register, if necessary. 1462/// 1463SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1464 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1465 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); 1466} 1467 1468static SDNode *FindCallStartFromCall(SDNode *Node) { 1469 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1470 assert(Node->getOperand(0).getValueType() == MVT::Other && 1471 "Node doesn't have a token chain argument!"); 1472 return FindCallStartFromCall(Node->getOperand(0).getNode()); 1473} 1474 1475SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1476 SDValue Chain = Node->getOperand(0); 1477 SDValue In1 = Node->getOperand(1); 1478 SDValue In2L = Node->getOperand(2); 1479 SDValue In2H = Node->getOperand(3); 1480 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1481 if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1482 return NULL; 1483 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1484 MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1485 const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; 1486 SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), 1487 MVT::i32, MVT::i32, MVT::Other, Ops, 1488 array_lengthof(Ops)); 1489 cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); 1490 return ResNode; 1491} 1492 1493SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { 1494 if (Node->hasAnyUseOfValue(0)) 1495 return 0; 1496 1497 // Optimize common patterns for __sync_add_and_fetch and 1498 // __sync_sub_and_fetch where the result is not used. This allows us 1499 // to use "lock" version of add, sub, inc, dec instructions. 1500 // FIXME: Do not use special instructions but instead add the "lock" 1501 // prefix to the target node somehow. The extra information will then be 1502 // transferred to machine instruction and it denotes the prefix. 1503 SDValue Chain = Node->getOperand(0); 1504 SDValue Ptr = Node->getOperand(1); 1505 SDValue Val = Node->getOperand(2); 1506 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1507 if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1508 return 0; 1509 1510 bool isInc = false, isDec = false, isSub = false, isCN = false; 1511 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); 1512 if (CN) { 1513 isCN = true; 1514 int64_t CNVal = CN->getSExtValue(); 1515 if (CNVal == 1) 1516 isInc = true; 1517 else if (CNVal == -1) 1518 isDec = true; 1519 else if (CNVal >= 0) 1520 Val = CurDAG->getTargetConstant(CNVal, NVT); 1521 else { 1522 isSub = true; 1523 Val = CurDAG->getTargetConstant(-CNVal, NVT); 1524 } 1525 } else if (Val.hasOneUse() && 1526 Val.getOpcode() == ISD::SUB && 1527 X86::isZeroNode(Val.getOperand(0))) { 1528 isSub = true; 1529 Val = Val.getOperand(1); 1530 } 1531 1532 unsigned Opc = 0; 1533 switch (NVT.getSimpleVT().SimpleTy) { 1534 default: return 0; 1535 case MVT::i8: 1536 if (isInc) 1537 Opc = X86::LOCK_INC8m; 1538 else if (isDec) 1539 Opc = X86::LOCK_DEC8m; 1540 else if (isSub) { 1541 if (isCN) 1542 Opc = X86::LOCK_SUB8mi; 1543 else 1544 Opc = X86::LOCK_SUB8mr; 1545 } else { 1546 if (isCN) 1547 Opc = X86::LOCK_ADD8mi; 1548 else 1549 Opc = X86::LOCK_ADD8mr; 1550 } 1551 break; 1552 case MVT::i16: 1553 if (isInc) 1554 Opc = X86::LOCK_INC16m; 1555 else if (isDec) 1556 Opc = X86::LOCK_DEC16m; 1557 else if (isSub) { 1558 if (isCN) { 1559 if (Predicate_i16immSExt8(Val.getNode())) 1560 Opc = X86::LOCK_SUB16mi8; 1561 else 1562 Opc = X86::LOCK_SUB16mi; 1563 } else 1564 Opc = X86::LOCK_SUB16mr; 1565 } else { 1566 if (isCN) { 1567 if (Predicate_i16immSExt8(Val.getNode())) 1568 Opc = X86::LOCK_ADD16mi8; 1569 else 1570 Opc = X86::LOCK_ADD16mi; 1571 } else 1572 Opc = X86::LOCK_ADD16mr; 1573 } 1574 break; 1575 case MVT::i32: 1576 if (isInc) 1577 Opc = X86::LOCK_INC32m; 1578 else if (isDec) 1579 Opc = X86::LOCK_DEC32m; 1580 else if (isSub) { 1581 if (isCN) { 1582 if (Predicate_i32immSExt8(Val.getNode())) 1583 Opc = X86::LOCK_SUB32mi8; 1584 else 1585 Opc = X86::LOCK_SUB32mi; 1586 } else 1587 Opc = X86::LOCK_SUB32mr; 1588 } else { 1589 if (isCN) { 1590 if (Predicate_i32immSExt8(Val.getNode())) 1591 Opc = X86::LOCK_ADD32mi8; 1592 else 1593 Opc = X86::LOCK_ADD32mi; 1594 } else 1595 Opc = X86::LOCK_ADD32mr; 1596 } 1597 break; 1598 case MVT::i64: 1599 if (isInc) 1600 Opc = X86::LOCK_INC64m; 1601 else if (isDec) 1602 Opc = X86::LOCK_DEC64m; 1603 else if (isSub) { 1604 Opc = X86::LOCK_SUB64mr; 1605 if (isCN) { 1606 if (Predicate_i64immSExt8(Val.getNode())) 1607 Opc = X86::LOCK_SUB64mi8; 1608 else if (Predicate_i64immSExt32(Val.getNode())) 1609 Opc = X86::LOCK_SUB64mi32; 1610 } 1611 } else { 1612 Opc = X86::LOCK_ADD64mr; 1613 if (isCN) { 1614 if (Predicate_i64immSExt8(Val.getNode())) 1615 Opc = X86::LOCK_ADD64mi8; 1616 else if (Predicate_i64immSExt32(Val.getNode())) 1617 Opc = X86::LOCK_ADD64mi32; 1618 } 1619 } 1620 break; 1621 } 1622 1623 DebugLoc dl = Node->getDebugLoc(); 1624 SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, 1625 dl, NVT), 0); 1626 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1627 MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1628 if (isInc || isDec) { 1629 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; 1630 SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); 1631 cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1632 SDValue RetVals[] = { Undef, Ret }; 1633 return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1634 } else { 1635 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; 1636 SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); 1637 cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1638 SDValue RetVals[] = { Undef, Ret }; 1639 return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1640 } 1641} 1642 1643/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has 1644/// any uses which require the SF or OF bits to be accurate. 1645static bool HasNoSignedComparisonUses(SDNode *N) { 1646 // Examine each user of the node. 1647 for (SDNode::use_iterator UI = N->use_begin(), 1648 UE = N->use_end(); UI != UE; ++UI) { 1649 // Only examine CopyToReg uses. 1650 if (UI->getOpcode() != ISD::CopyToReg) 1651 return false; 1652 // Only examine CopyToReg uses that copy to EFLAGS. 1653 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 1654 X86::EFLAGS) 1655 return false; 1656 // Examine each user of the CopyToReg use. 1657 for (SDNode::use_iterator FlagUI = UI->use_begin(), 1658 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 1659 // Only examine the Flag result. 1660 if (FlagUI.getUse().getResNo() != 1) continue; 1661 // Anything unusual: assume conservatively. 1662 if (!FlagUI->isMachineOpcode()) return false; 1663 // Examine the opcode of the user. 1664 switch (FlagUI->getMachineOpcode()) { 1665 // These comparisons don't treat the most significant bit specially. 1666 case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 1667 case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 1668 case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 1669 case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 1670 case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: 1671 case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: 1672 case X86::CMOVA16rr: case X86::CMOVA16rm: 1673 case X86::CMOVA32rr: case X86::CMOVA32rm: 1674 case X86::CMOVA64rr: case X86::CMOVA64rm: 1675 case X86::CMOVAE16rr: case X86::CMOVAE16rm: 1676 case X86::CMOVAE32rr: case X86::CMOVAE32rm: 1677 case X86::CMOVAE64rr: case X86::CMOVAE64rm: 1678 case X86::CMOVB16rr: case X86::CMOVB16rm: 1679 case X86::CMOVB32rr: case X86::CMOVB32rm: 1680 case X86::CMOVB64rr: case X86::CMOVB64rm: 1681 case X86::CMOVBE16rr: case X86::CMOVBE16rm: 1682 case X86::CMOVBE32rr: case X86::CMOVBE32rm: 1683 case X86::CMOVBE64rr: case X86::CMOVBE64rm: 1684 case X86::CMOVE16rr: case X86::CMOVE16rm: 1685 case X86::CMOVE32rr: case X86::CMOVE32rm: 1686 case X86::CMOVE64rr: case X86::CMOVE64rm: 1687 case X86::CMOVNE16rr: case X86::CMOVNE16rm: 1688 case X86::CMOVNE32rr: case X86::CMOVNE32rm: 1689 case X86::CMOVNE64rr: case X86::CMOVNE64rm: 1690 case X86::CMOVNP16rr: case X86::CMOVNP16rm: 1691 case X86::CMOVNP32rr: case X86::CMOVNP32rm: 1692 case X86::CMOVNP64rr: case X86::CMOVNP64rm: 1693 case X86::CMOVP16rr: case X86::CMOVP16rm: 1694 case X86::CMOVP32rr: case X86::CMOVP32rm: 1695 case X86::CMOVP64rr: case X86::CMOVP64rm: 1696 continue; 1697 // Anything else: assume conservatively. 1698 default: return false; 1699 } 1700 } 1701 } 1702 return true; 1703} 1704 1705SDNode *X86DAGToDAGISel::Select(SDValue N) { 1706 SDNode *Node = N.getNode(); 1707 EVT NVT = Node->getValueType(0); 1708 unsigned Opc, MOpc; 1709 unsigned Opcode = Node->getOpcode(); 1710 DebugLoc dl = Node->getDebugLoc(); 1711 1712#ifndef NDEBUG 1713 DEBUG({ 1714 errs() << std::string(Indent, ' ') << "Selecting: "; 1715 Node->dump(CurDAG); 1716 errs() << '\n'; 1717 }); 1718 Indent += 2; 1719#endif 1720 1721 if (Node->isMachineOpcode()) { 1722#ifndef NDEBUG 1723 DEBUG({ 1724 errs() << std::string(Indent-2, ' ') << "== "; 1725 Node->dump(CurDAG); 1726 errs() << '\n'; 1727 }); 1728 Indent -= 2; 1729#endif 1730 return NULL; // Already selected. 1731 } 1732 1733 switch (Opcode) { 1734 default: break; 1735 case X86ISD::GlobalBaseReg: 1736 return getGlobalBaseReg(); 1737 1738 case X86ISD::ATOMOR64_DAG: 1739 return SelectAtomic64(Node, X86::ATOMOR6432); 1740 case X86ISD::ATOMXOR64_DAG: 1741 return SelectAtomic64(Node, X86::ATOMXOR6432); 1742 case X86ISD::ATOMADD64_DAG: 1743 return SelectAtomic64(Node, X86::ATOMADD6432); 1744 case X86ISD::ATOMSUB64_DAG: 1745 return SelectAtomic64(Node, X86::ATOMSUB6432); 1746 case X86ISD::ATOMNAND64_DAG: 1747 return SelectAtomic64(Node, X86::ATOMNAND6432); 1748 case X86ISD::ATOMAND64_DAG: 1749 return SelectAtomic64(Node, X86::ATOMAND6432); 1750 case X86ISD::ATOMSWAP64_DAG: 1751 return SelectAtomic64(Node, X86::ATOMSWAP6432); 1752 1753 case ISD::ATOMIC_LOAD_ADD: { 1754 SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); 1755 if (RetVal) 1756 return RetVal; 1757 break; 1758 } 1759 1760 case ISD::SMUL_LOHI: 1761 case ISD::UMUL_LOHI: { 1762 SDValue N0 = Node->getOperand(0); 1763 SDValue N1 = Node->getOperand(1); 1764 1765 bool isSigned = Opcode == ISD::SMUL_LOHI; 1766 if (!isSigned) { 1767 switch (NVT.getSimpleVT().SimpleTy) { 1768 default: llvm_unreachable("Unsupported VT!"); 1769 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1770 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1771 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1772 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1773 } 1774 } else { 1775 switch (NVT.getSimpleVT().SimpleTy) { 1776 default: llvm_unreachable("Unsupported VT!"); 1777 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1778 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1779 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1780 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1781 } 1782 } 1783 1784 unsigned LoReg, HiReg; 1785 switch (NVT.getSimpleVT().SimpleTy) { 1786 default: llvm_unreachable("Unsupported VT!"); 1787 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1788 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1789 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1790 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1791 } 1792 1793 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1794 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1795 // Multiply is commmutative. 1796 if (!foldedLoad) { 1797 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1798 if (foldedLoad) 1799 std::swap(N0, N1); 1800 } 1801 1802 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 1803 N0, SDValue()).getValue(1); 1804 1805 if (foldedLoad) { 1806 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1807 InFlag }; 1808 SDNode *CNode = 1809 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1810 array_lengthof(Ops)); 1811 InFlag = SDValue(CNode, 1); 1812 // Update the chain. 1813 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1814 } else { 1815 InFlag = 1816 SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1817 } 1818 1819 // Copy the low half of the result, if it is needed. 1820 if (!N.getValue(0).use_empty()) { 1821 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1822 LoReg, NVT, InFlag); 1823 InFlag = Result.getValue(2); 1824 ReplaceUses(N.getValue(0), Result); 1825#ifndef NDEBUG 1826 DEBUG({ 1827 errs() << std::string(Indent-2, ' ') << "=> "; 1828 Result.getNode()->dump(CurDAG); 1829 errs() << '\n'; 1830 }); 1831#endif 1832 } 1833 // Copy the high half of the result, if it is needed. 1834 if (!N.getValue(1).use_empty()) { 1835 SDValue Result; 1836 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1837 // Prevent use of AH in a REX instruction by referencing AX instead. 1838 // Shift it down 8 bits. 1839 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1840 X86::AX, MVT::i16, InFlag); 1841 InFlag = Result.getValue(2); 1842 Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 1843 Result, 1844 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1845 // Then truncate it down to i8. 1846 Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 1847 MVT::i8, Result); 1848 } else { 1849 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1850 HiReg, NVT, InFlag); 1851 InFlag = Result.getValue(2); 1852 } 1853 ReplaceUses(N.getValue(1), Result); 1854#ifndef NDEBUG 1855 DEBUG({ 1856 errs() << std::string(Indent-2, ' ') << "=> "; 1857 Result.getNode()->dump(CurDAG); 1858 errs() << '\n'; 1859 }); 1860#endif 1861 } 1862 1863#ifndef NDEBUG 1864 Indent -= 2; 1865#endif 1866 1867 return NULL; 1868 } 1869 1870 case ISD::SDIVREM: 1871 case ISD::UDIVREM: { 1872 SDValue N0 = Node->getOperand(0); 1873 SDValue N1 = Node->getOperand(1); 1874 1875 bool isSigned = Opcode == ISD::SDIVREM; 1876 if (!isSigned) { 1877 switch (NVT.getSimpleVT().SimpleTy) { 1878 default: llvm_unreachable("Unsupported VT!"); 1879 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1880 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1881 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1882 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1883 } 1884 } else { 1885 switch (NVT.getSimpleVT().SimpleTy) { 1886 default: llvm_unreachable("Unsupported VT!"); 1887 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1888 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1889 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1890 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1891 } 1892 } 1893 1894 unsigned LoReg, HiReg; 1895 unsigned ClrOpcode, SExtOpcode; 1896 switch (NVT.getSimpleVT().SimpleTy) { 1897 default: llvm_unreachable("Unsupported VT!"); 1898 case MVT::i8: 1899 LoReg = X86::AL; HiReg = X86::AH; 1900 ClrOpcode = 0; 1901 SExtOpcode = X86::CBW; 1902 break; 1903 case MVT::i16: 1904 LoReg = X86::AX; HiReg = X86::DX; 1905 ClrOpcode = X86::MOV16r0; 1906 SExtOpcode = X86::CWD; 1907 break; 1908 case MVT::i32: 1909 LoReg = X86::EAX; HiReg = X86::EDX; 1910 ClrOpcode = X86::MOV32r0; 1911 SExtOpcode = X86::CDQ; 1912 break; 1913 case MVT::i64: 1914 LoReg = X86::RAX; HiReg = X86::RDX; 1915 ClrOpcode = ~0U; // NOT USED. 1916 SExtOpcode = X86::CQO; 1917 break; 1918 } 1919 1920 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1921 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1922 bool signBitIsZero = CurDAG->SignBitIsZero(N0); 1923 1924 SDValue InFlag; 1925 if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 1926 // Special case for div8, just use a move with zero extension to AX to 1927 // clear the upper 8 bits (AH). 1928 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 1929 if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 1930 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 1931 Move = 1932 SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, 1933 MVT::Other, Ops, 1934 array_lengthof(Ops)), 0); 1935 Chain = Move.getValue(1); 1936 ReplaceUses(N0.getValue(1), Chain); 1937 } else { 1938 Move = 1939 SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); 1940 Chain = CurDAG->getEntryNode(); 1941 } 1942 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); 1943 InFlag = Chain.getValue(1); 1944 } else { 1945 InFlag = 1946 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 1947 LoReg, N0, SDValue()).getValue(1); 1948 if (isSigned && !signBitIsZero) { 1949 // Sign extend the low part into the high part. 1950 InFlag = 1951 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); 1952 } else { 1953 // Zero out the high part, effectively zero extending the input. 1954 SDValue ClrNode; 1955 1956 if (NVT.getSimpleVT() == MVT::i64) { 1957 ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), 1958 0); 1959 // We just did a 32-bit clear, insert it into a 64-bit register to 1960 // clear the whole 64-bit reg. 1961 SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); 1962 SDValue SubRegNo = 1963 CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); 1964 ClrNode = 1965 SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, 1966 MVT::i64, Zero, ClrNode, SubRegNo), 1967 0); 1968 } else { 1969 ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); 1970 } 1971 1972 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, 1973 ClrNode, InFlag).getValue(1); 1974 } 1975 } 1976 1977 if (foldedLoad) { 1978 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1979 InFlag }; 1980 SDNode *CNode = 1981 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1982 array_lengthof(Ops)); 1983 InFlag = SDValue(CNode, 1); 1984 // Update the chain. 1985 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1986 } else { 1987 InFlag = 1988 SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1989 } 1990 1991 // Copy the division (low) result, if it is needed. 1992 if (!N.getValue(0).use_empty()) { 1993 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1994 LoReg, NVT, InFlag); 1995 InFlag = Result.getValue(2); 1996 ReplaceUses(N.getValue(0), Result); 1997#ifndef NDEBUG 1998 DEBUG({ 1999 errs() << std::string(Indent-2, ' ') << "=> "; 2000 Result.getNode()->dump(CurDAG); 2001 errs() << '\n'; 2002 }); 2003#endif 2004 } 2005 // Copy the remainder (high) result, if it is needed. 2006 if (!N.getValue(1).use_empty()) { 2007 SDValue Result; 2008 if (HiReg == X86::AH && Subtarget->is64Bit()) { 2009 // Prevent use of AH in a REX instruction by referencing AX instead. 2010 // Shift it down 8 bits. 2011 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2012 X86::AX, MVT::i16, InFlag); 2013 InFlag = Result.getValue(2); 2014 Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2015 Result, 2016 CurDAG->getTargetConstant(8, MVT::i8)), 2017 0); 2018 // Then truncate it down to i8. 2019 Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 2020 MVT::i8, Result); 2021 } else { 2022 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2023 HiReg, NVT, InFlag); 2024 InFlag = Result.getValue(2); 2025 } 2026 ReplaceUses(N.getValue(1), Result); 2027#ifndef NDEBUG 2028 DEBUG({ 2029 errs() << std::string(Indent-2, ' ') << "=> "; 2030 Result.getNode()->dump(CurDAG); 2031 errs() << '\n'; 2032 }); 2033#endif 2034 } 2035 2036#ifndef NDEBUG 2037 Indent -= 2; 2038#endif 2039 2040 return NULL; 2041 } 2042 2043 case X86ISD::CMP: { 2044 SDValue N0 = Node->getOperand(0); 2045 SDValue N1 = Node->getOperand(1); 2046 2047 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 2048 // use a smaller encoding. 2049 if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 2050 N0.getValueType() != MVT::i8 && 2051 X86::isZeroNode(N1)) { 2052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 2053 if (!C) break; 2054 2055 // For example, convert "testl %eax, $8" to "testb %al, $8" 2056 if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 2057 (!(C->getZExtValue() & 0x80) || 2058 HasNoSignedComparisonUses(Node))) { 2059 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); 2060 SDValue Reg = N0.getNode()->getOperand(0); 2061 2062 // On x86-32, only the ABCD registers have 8-bit subregisters. 2063 if (!Subtarget->is64Bit()) { 2064 TargetRegisterClass *TRC = 0; 2065 switch (N0.getValueType().getSimpleVT().SimpleTy) { 2066 case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2067 case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2068 default: llvm_unreachable("Unsupported TEST operand type!"); 2069 } 2070 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2071 Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2072 Reg.getValueType(), Reg, RC), 0); 2073 } 2074 2075 // Extract the l-register. 2076 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 2077 MVT::i8, Reg); 2078 2079 // Emit a testb. 2080 return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); 2081 } 2082 2083 // For example, "testl %eax, $2048" to "testb %ah, $8". 2084 if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 2085 (!(C->getZExtValue() & 0x8000) || 2086 HasNoSignedComparisonUses(Node))) { 2087 // Shift the immediate right by 8 bits. 2088 SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 2089 MVT::i8); 2090 SDValue Reg = N0.getNode()->getOperand(0); 2091 2092 // Put the value in an ABCD register. 2093 TargetRegisterClass *TRC = 0; 2094 switch (N0.getValueType().getSimpleVT().SimpleTy) { 2095 case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 2096 case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2097 case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2098 default: llvm_unreachable("Unsupported TEST operand type!"); 2099 } 2100 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2101 Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2102 Reg.getValueType(), Reg, RC), 0); 2103 2104 // Extract the h-register. 2105 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, 2106 MVT::i8, Reg); 2107 2108 // Emit a testb. No special NOREX tricks are needed since there's 2109 // only one GPR operand! 2110 return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 2111 Subreg, ShiftedImm); 2112 } 2113 2114 // For example, "testl %eax, $32776" to "testw %ax, $32776". 2115 if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 2116 N0.getValueType() != MVT::i16 && 2117 (!(C->getZExtValue() & 0x8000) || 2118 HasNoSignedComparisonUses(Node))) { 2119 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); 2120 SDValue Reg = N0.getNode()->getOperand(0); 2121 2122 // Extract the 16-bit subregister. 2123 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, 2124 MVT::i16, Reg); 2125 2126 // Emit a testw. 2127 return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); 2128 } 2129 2130 // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 2131 if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 2132 N0.getValueType() == MVT::i64 && 2133 (!(C->getZExtValue() & 0x80000000) || 2134 HasNoSignedComparisonUses(Node))) { 2135 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 2136 SDValue Reg = N0.getNode()->getOperand(0); 2137 2138 // Extract the 32-bit subregister. 2139 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, 2140 MVT::i32, Reg); 2141 2142 // Emit a testl. 2143 return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); 2144 } 2145 } 2146 break; 2147 } 2148 } 2149 2150 SDNode *ResNode = SelectCode(N); 2151 2152#ifndef NDEBUG 2153 DEBUG({ 2154 errs() << std::string(Indent-2, ' ') << "=> "; 2155 if (ResNode == NULL || ResNode == N.getNode()) 2156 N.getNode()->dump(CurDAG); 2157 else 2158 ResNode->dump(CurDAG); 2159 errs() << '\n'; 2160 }); 2161 Indent -= 2; 2162#endif 2163 2164 return ResNode; 2165} 2166 2167bool X86DAGToDAGISel:: 2168SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 2169 std::vector<SDValue> &OutOps) { 2170 SDValue Op0, Op1, Op2, Op3, Op4; 2171 switch (ConstraintCode) { 2172 case 'o': // offsetable ?? 2173 case 'v': // not offsetable ?? 2174 default: return true; 2175 case 'm': // memory 2176 if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) 2177 return true; 2178 break; 2179 } 2180 2181 OutOps.push_back(Op0); 2182 OutOps.push_back(Op1); 2183 OutOps.push_back(Op2); 2184 OutOps.push_back(Op3); 2185 OutOps.push_back(Op4); 2186 return false; 2187} 2188 2189/// createX86ISelDag - This pass converts a legalized DAG into a 2190/// X86-specific DAG, ready for instruction scheduling. 2191/// 2192FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 2193 llvm::CodeGenOpt::Level OptLevel) { 2194 return new X86DAGToDAGISel(TM, OptLevel); 2195} 2196