X86ISelDAGToDAG.cpp revision 276479
1193323Sed//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file defines a DAG pattern matching instruction selector for X86, 11193323Sed// converting from a legalized dag to a X86 dag. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#include "X86.h" 16193323Sed#include "X86InstrBuilder.h" 17193323Sed#include "X86MachineFunctionInfo.h" 18193323Sed#include "X86RegisterInfo.h" 19193323Sed#include "X86Subtarget.h" 20193323Sed#include "X86TargetMachine.h" 21249423Sdim#include "llvm/ADT/Statistic.h" 22249423Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 23193323Sed#include "llvm/CodeGen/MachineFunction.h" 24193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 25193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 26193323Sed#include "llvm/CodeGen/SelectionDAGISel.h" 27249423Sdim#include "llvm/IR/Instructions.h" 28249423Sdim#include "llvm/IR/Intrinsics.h" 29249423Sdim#include "llvm/IR/Type.h" 30193323Sed#include "llvm/Support/Debug.h" 31198090Srdivacky#include "llvm/Support/ErrorHandling.h" 32193323Sed#include "llvm/Support/MathExtras.h" 33198090Srdivacky#include "llvm/Support/raw_ostream.h" 34249423Sdim#include "llvm/Target/TargetMachine.h" 35249423Sdim#include "llvm/Target/TargetOptions.h" 36193323Sedusing namespace llvm; 37193323Sed 38276479Sdim#define DEBUG_TYPE "x86-isel" 39276479Sdim 40193323SedSTATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 41193323Sed 42193323Sed//===----------------------------------------------------------------------===// 43193323Sed// Pattern Matcher Implementation 44193323Sed//===----------------------------------------------------------------------===// 45193323Sed 46193323Sednamespace { 47193323Sed /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 48193323Sed /// SDValue's instead of register numbers for the leaves of the matched 49193323Sed /// tree. 50193323Sed struct X86ISelAddressMode { 51193323Sed enum { 52193323Sed RegBase, 53193323Sed FrameIndexBase 54193323Sed } BaseType; 55193323Sed 56207618Srdivacky // This is really a union, discriminated by BaseType! 57207618Srdivacky SDValue Base_Reg; 58207618Srdivacky int Base_FrameIndex; 59193323Sed 60193323Sed unsigned Scale; 61239462Sdim SDValue IndexReg; 62193323Sed int32_t Disp; 63193323Sed SDValue Segment; 64207618Srdivacky const GlobalValue *GV; 65207618Srdivacky const Constant *CP; 66207618Srdivacky const BlockAddress *BlockAddr; 67193323Sed const char *ES; 68193323Sed int JT; 69193323Sed unsigned Align; // CP alignment. 70195098Sed unsigned char SymbolFlags; // X86II::MO_* 71193323Sed 72193323Sed X86ISelAddressMode() 73207618Srdivacky : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), 74276479Sdim Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), 75276479Sdim JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) { 76193323Sed } 77193323Sed 78193323Sed bool hasSymbolicDisplacement() const { 79276479Sdim return GV != nullptr || CP != nullptr || ES != nullptr || 80276479Sdim JT != -1 || BlockAddr != nullptr; 81193323Sed } 82239462Sdim 83195098Sed bool hasBaseOrIndexReg() const { 84261991Sdim return BaseType == FrameIndexBase || 85276479Sdim IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; 86195098Sed } 87239462Sdim 88195098Sed /// isRIPRelative - Return true if this addressing mode is already RIP 89195098Sed /// relative. 90195098Sed bool isRIPRelative() const { 91195098Sed if (BaseType != RegBase) return false; 92195098Sed if (RegisterSDNode *RegNode = 93207618Srdivacky dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) 94195098Sed return RegNode->getReg() == X86::RIP; 95195098Sed return false; 96195098Sed } 97239462Sdim 98195098Sed void setBaseReg(SDValue Reg) { 99195098Sed BaseType = RegBase; 100207618Srdivacky Base_Reg = Reg; 101195098Sed } 102193323Sed 103243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 104193323Sed void dump() { 105202375Srdivacky dbgs() << "X86ISelAddressMode " << this << '\n'; 106207618Srdivacky dbgs() << "Base_Reg "; 107276479Sdim if (Base_Reg.getNode()) 108239462Sdim Base_Reg.getNode()->dump(); 109198090Srdivacky else 110202375Srdivacky dbgs() << "nul"; 111207618Srdivacky dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' 112198090Srdivacky << " Scale" << Scale << '\n' 113198090Srdivacky << "IndexReg "; 114276479Sdim if (IndexReg.getNode()) 115198090Srdivacky IndexReg.getNode()->dump(); 116198090Srdivacky else 117239462Sdim dbgs() << "nul"; 118202375Srdivacky dbgs() << " Disp " << Disp << '\n' 119198090Srdivacky << "GV "; 120198090Srdivacky if (GV) 121198090Srdivacky GV->dump(); 122198090Srdivacky else 123202375Srdivacky dbgs() << "nul"; 124202375Srdivacky dbgs() << " CP "; 125198090Srdivacky if (CP) 126198090Srdivacky CP->dump(); 127198090Srdivacky else 128202375Srdivacky dbgs() << "nul"; 129202375Srdivacky dbgs() << '\n' 130198090Srdivacky << "ES "; 131198090Srdivacky if (ES) 132202375Srdivacky dbgs() << ES; 133198090Srdivacky else 134202375Srdivacky dbgs() << "nul"; 135202375Srdivacky dbgs() << " JT" << JT << " Align" << Align << '\n'; 136193323Sed } 137243830Sdim#endif 138193323Sed }; 139193323Sed} 140193323Sed 141193323Sednamespace { 142193323Sed //===--------------------------------------------------------------------===// 143193323Sed /// ISel - X86 specific code to select X86 machine instructions for 144193323Sed /// SelectionDAG operations. 145193323Sed /// 146276479Sdim class X86DAGToDAGISel final : public SelectionDAGISel { 147193323Sed /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 148193323Sed /// make the right decision when generating code for different targets. 149193323Sed const X86Subtarget *Subtarget; 150193323Sed 151193323Sed /// OptForSize - If true, selector should try to optimize for code size 152193323Sed /// instead of performance. 153193323Sed bool OptForSize; 154193323Sed 155193323Sed public: 156193323Sed explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 157193323Sed : SelectionDAGISel(tm, OptLevel), 158193399Sed Subtarget(&tm.getSubtarget<X86Subtarget>()), 159193323Sed OptForSize(false) {} 160193323Sed 161276479Sdim const char *getPassName() const override { 162193323Sed return "X86 DAG->DAG Instruction Selection"; 163193323Sed } 164193323Sed 165276479Sdim bool runOnMachineFunction(MachineFunction &MF) override { 166276479Sdim // Reset the subtarget each time through. 167276479Sdim Subtarget = &TM.getSubtarget<X86Subtarget>(); 168276479Sdim SelectionDAGISel::runOnMachineFunction(MF); 169276479Sdim return true; 170276479Sdim } 171193323Sed 172276479Sdim void EmitFunctionEntryCode() override; 173193323Sed 174276479Sdim bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; 175203954Srdivacky 176276479Sdim void PreprocessISelDAG() override; 177276479Sdim 178212904Sdim inline bool immSext8(SDNode *N) const { 179212904Sdim return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue()); 180212904Sdim } 181212904Sdim 182212904Sdim // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit 183212904Sdim // sign extended field. 184212904Sdim inline bool i64immSExt32(SDNode *N) const { 185212904Sdim uint64_t v = cast<ConstantSDNode>(N)->getZExtValue(); 186212904Sdim return (int64_t)v == (int32_t)v; 187212904Sdim } 188212904Sdim 189193323Sed// Include the pieces autogenerated from the target description. 190193323Sed#include "X86GenDAGISel.inc" 191193323Sed 192193323Sed private: 193276479Sdim SDNode *Select(SDNode *N) override; 194239462Sdim SDNode *SelectGather(SDNode *N, unsigned Opc); 195193323Sed SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 196261991Sdim SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT); 197193323Sed 198224145Sdim bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); 199218893Sdim bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); 200193323Sed bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 201198090Srdivacky bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 202198090Srdivacky bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 203198090Srdivacky unsigned Depth); 204193323Sed bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 205218893Sdim bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, 206193323Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 207193323Sed SDValue &Segment); 208261991Sdim bool SelectMOV64Imm32(SDValue N, SDValue &Imm); 209218893Sdim bool SelectLEAAddr(SDValue N, SDValue &Base, 210210299Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 211210299Sed SDValue &Segment); 212261991Sdim bool SelectLEA64_32Addr(SDValue N, SDValue &Base, 213261991Sdim SDValue &Scale, SDValue &Index, SDValue &Disp, 214261991Sdim SDValue &Segment); 215218893Sdim bool SelectTLSADDRAddr(SDValue N, SDValue &Base, 216210299Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 217210299Sed SDValue &Segment); 218204642Srdivacky bool SelectScalarSSELoad(SDNode *Root, SDValue N, 219204642Srdivacky SDValue &Base, SDValue &Scale, 220193323Sed SDValue &Index, SDValue &Disp, 221193323Sed SDValue &Segment, 222204642Srdivacky SDValue &NodeWithChain); 223239462Sdim 224202375Srdivacky bool TryFoldLoad(SDNode *P, SDValue N, 225193323Sed SDValue &Base, SDValue &Scale, 226193323Sed SDValue &Index, SDValue &Disp, 227193323Sed SDValue &Segment); 228239462Sdim 229193323Sed /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 230193323Sed /// inline asm expressions. 231276479Sdim bool SelectInlineAsmMemoryOperand(const SDValue &Op, 232276479Sdim char ConstraintCode, 233276479Sdim std::vector<SDValue> &OutOps) override; 234239462Sdim 235193323Sed void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 236193323Sed 237239462Sdim inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 238193323Sed SDValue &Scale, SDValue &Index, 239193323Sed SDValue &Disp, SDValue &Segment) { 240193323Sed Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 241261991Sdim CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, 242261991Sdim getTargetLowering()->getPointerTy()) : 243207618Srdivacky AM.Base_Reg; 244193323Sed Scale = getI8Imm(AM.Scale); 245193323Sed Index = AM.IndexReg; 246193323Sed // These are 32-bit even in 64-bit mode since RIP relative offset 247193323Sed // is 32-bit. 248193323Sed if (AM.GV) 249261991Sdim Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), 250210299Sed MVT::i32, AM.Disp, 251195098Sed AM.SymbolFlags); 252193323Sed else if (AM.CP) 253193323Sed Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 254195098Sed AM.Align, AM.Disp, AM.SymbolFlags); 255243830Sdim else if (AM.ES) { 256243830Sdim assert(!AM.Disp && "Non-zero displacement is ignored with ES."); 257195098Sed Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 258243830Sdim } else if (AM.JT != -1) { 259243830Sdim assert(!AM.Disp && "Non-zero displacement is ignored with JT."); 260195098Sed Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 261243830Sdim } else if (AM.BlockAddr) 262243830Sdim Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, 263243830Sdim AM.SymbolFlags); 264193323Sed else 265193323Sed Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 266193323Sed 267193323Sed if (AM.Segment.getNode()) 268193323Sed Segment = AM.Segment; 269193323Sed else 270193323Sed Segment = CurDAG->getRegister(0, MVT::i32); 271193323Sed } 272193323Sed 273193323Sed /// getI8Imm - Return a target constant with the specified value, of type 274193323Sed /// i8. 275193323Sed inline SDValue getI8Imm(unsigned Imm) { 276193323Sed return CurDAG->getTargetConstant(Imm, MVT::i8); 277193323Sed } 278193323Sed 279193323Sed /// getI32Imm - Return a target constant with the specified value, of type 280193323Sed /// i32. 281193323Sed inline SDValue getI32Imm(unsigned Imm) { 282193323Sed return CurDAG->getTargetConstant(Imm, MVT::i32); 283193323Sed } 284193323Sed 285193323Sed /// getGlobalBaseReg - Return an SDNode that returns the value of 286193323Sed /// the global base register. Output instructions required to 287193323Sed /// initialize the global base register, if necessary. 288193323Sed /// 289193323Sed SDNode *getGlobalBaseReg(); 290193323Sed 291193399Sed /// getTargetMachine - Return a reference to the TargetMachine, casted 292193399Sed /// to the target-specific type. 293249423Sdim const X86TargetMachine &getTargetMachine() const { 294193399Sed return static_cast<const X86TargetMachine &>(TM); 295193399Sed } 296193399Sed 297193399Sed /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 298193399Sed /// to the target-specific type. 299249423Sdim const X86InstrInfo *getInstrInfo() const { 300193399Sed return getTargetMachine().getInstrInfo(); 301193399Sed } 302193323Sed }; 303193323Sed} 304193323Sed 305193323Sed 306203954Srdivackybool 307203954SrdivackyX86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { 308193323Sed if (OptLevel == CodeGenOpt::None) return false; 309193323Sed 310203954Srdivacky if (!N.hasOneUse()) 311203954Srdivacky return false; 312203954Srdivacky 313203954Srdivacky if (N.getOpcode() != ISD::LOAD) 314203954Srdivacky return true; 315203954Srdivacky 316203954Srdivacky // If N is a load, do additional profitability checks. 317203954Srdivacky if (U == Root) { 318193323Sed switch (U->getOpcode()) { 319193323Sed default: break; 320202375Srdivacky case X86ISD::ADD: 321202375Srdivacky case X86ISD::SUB: 322202375Srdivacky case X86ISD::AND: 323202375Srdivacky case X86ISD::XOR: 324202375Srdivacky case X86ISD::OR: 325193323Sed case ISD::ADD: 326193323Sed case ISD::ADDC: 327193323Sed case ISD::ADDE: 328193323Sed case ISD::AND: 329193323Sed case ISD::OR: 330193323Sed case ISD::XOR: { 331193323Sed SDValue Op1 = U->getOperand(1); 332193323Sed 333193323Sed // If the other operand is a 8-bit immediate we should fold the immediate 334193323Sed // instead. This reduces code size. 335193323Sed // e.g. 336193323Sed // movl 4(%esp), %eax 337193323Sed // addl $4, %eax 338193323Sed // vs. 339193323Sed // movl $4, %eax 340193323Sed // addl 4(%esp), %eax 341193323Sed // The former is 2 bytes shorter. In case where the increment is 1, then 342193323Sed // the saving can be 4 bytes (by using incl %eax). 343193323Sed if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 344193323Sed if (Imm->getAPIntValue().isSignedIntN(8)) 345193323Sed return false; 346193323Sed 347193323Sed // If the other operand is a TLS address, we should fold it instead. 348193323Sed // This produces 349193323Sed // movl %gs:0, %eax 350193323Sed // leal i@NTPOFF(%eax), %eax 351193323Sed // instead of 352193323Sed // movl $i@NTPOFF, %eax 353193323Sed // addl %gs:0, %eax 354193323Sed // if the block also has an access to a second TLS address this will save 355193323Sed // a load. 356276479Sdim // FIXME: This is probably also true for non-TLS addresses. 357193323Sed if (Op1.getOpcode() == X86ISD::Wrapper) { 358193323Sed SDValue Val = Op1.getOperand(0); 359193323Sed if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 360193323Sed return false; 361193323Sed } 362193323Sed } 363193323Sed } 364203954Srdivacky } 365193323Sed 366203954Srdivacky return true; 367203954Srdivacky} 368203954Srdivacky 369205218Srdivacky/// MoveBelowCallOrigChain - Replace the original chain operand of the call with 370205218Srdivacky/// load's chain operand and move load below the call's chain operand. 371205218Srdivackystatic void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, 372243830Sdim SDValue Call, SDValue OrigChain) { 373193323Sed SmallVector<SDValue, 8> Ops; 374205218Srdivacky SDValue Chain = OrigChain.getOperand(0); 375193323Sed if (Chain.getNode() == Load.getNode()) 376193323Sed Ops.push_back(Load.getOperand(0)); 377193323Sed else { 378193323Sed assert(Chain.getOpcode() == ISD::TokenFactor && 379205218Srdivacky "Unexpected chain operand"); 380193323Sed for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 381193323Sed if (Chain.getOperand(i).getNode() == Load.getNode()) 382193323Sed Ops.push_back(Load.getOperand(0)); 383193323Sed else 384193323Sed Ops.push_back(Chain.getOperand(i)); 385193323Sed SDValue NewChain = 386276479Sdim CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); 387193323Sed Ops.clear(); 388193323Sed Ops.push_back(NewChain); 389193323Sed } 390205218Srdivacky for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) 391205218Srdivacky Ops.push_back(OrigChain.getOperand(i)); 392276479Sdim CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); 393210299Sed CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), 394193323Sed Load.getOperand(1), Load.getOperand(2)); 395243830Sdim 396243830Sdim unsigned NumOps = Call.getNode()->getNumOperands(); 397193323Sed Ops.clear(); 398193323Sed Ops.push_back(SDValue(Load.getNode(), 1)); 399243830Sdim for (unsigned i = 1, e = NumOps; i != e; ++i) 400193323Sed Ops.push_back(Call.getOperand(i)); 401276479Sdim CurDAG->UpdateNodeOperands(Call.getNode(), Ops); 402193323Sed} 403193323Sed 404193323Sed/// isCalleeLoad - Return true if call address is a load and it can be 405193323Sed/// moved below CALLSEQ_START and the chains leading up to the call. 406193323Sed/// Return the CALLSEQ_START by reference as a second output. 407205218Srdivacky/// In the case of a tail call, there isn't a callseq node between the call 408205218Srdivacky/// chain and the load. 409205218Srdivackystatic bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { 410243830Sdim // The transformation is somewhat dangerous if the call's chain was glued to 411243830Sdim // the call. After MoveBelowOrigChain the load is moved between the call and 412243830Sdim // the chain, this can create a cycle if the load is not folded. So it is 413243830Sdim // *really* important that we are sure the load will be folded. 414193323Sed if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 415193323Sed return false; 416193323Sed LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 417193323Sed if (!LD || 418193323Sed LD->isVolatile() || 419193323Sed LD->getAddressingMode() != ISD::UNINDEXED || 420193323Sed LD->getExtensionType() != ISD::NON_EXTLOAD) 421193323Sed return false; 422193323Sed 423193323Sed // Now let's find the callseq_start. 424205218Srdivacky while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { 425193323Sed if (!Chain.hasOneUse()) 426193323Sed return false; 427193323Sed Chain = Chain.getOperand(0); 428193323Sed } 429205218Srdivacky 430205218Srdivacky if (!Chain.getNumOperands()) 431205218Srdivacky return false; 432249423Sdim // Since we are not checking for AA here, conservatively abort if the chain 433249423Sdim // writes to memory. It's not safe to move the callee (a load) across a store. 434249423Sdim if (isa<MemSDNode>(Chain.getNode()) && 435249423Sdim cast<MemSDNode>(Chain.getNode())->writeMem()) 436249423Sdim return false; 437193323Sed if (Chain.getOperand(0).getNode() == Callee.getNode()) 438193323Sed return true; 439193323Sed if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 440198090Srdivacky Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 441198090Srdivacky Callee.getValue(1).hasOneUse()) 442193323Sed return true; 443193323Sed return false; 444193323Sed} 445193323Sed 446204642Srdivackyvoid X86DAGToDAGISel::PreprocessISelDAG() { 447204792Srdivacky // OptForSize is used in pattern predicates that isel is matching. 448249423Sdim OptForSize = MF->getFunction()->getAttributes(). 449249423Sdim hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); 450239462Sdim 451204642Srdivacky for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 452204642Srdivacky E = CurDAG->allnodes_end(); I != E; ) { 453204642Srdivacky SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 454193323Sed 455205218Srdivacky if (OptLevel != CodeGenOpt::None && 456249423Sdim // Only does this when target favors doesn't favor register indirect 457249423Sdim // call. 458249423Sdim ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || 459243830Sdim (N->getOpcode() == X86ISD::TC_RETURN && 460249423Sdim // Only does this if load can be folded into TC_RETURN. 461243830Sdim (Subtarget->is64Bit() || 462243830Sdim getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { 463193323Sed /// Also try moving call address load from outside callseq_start to just 464193323Sed /// before the call to allow it to be folded. 465193323Sed /// 466193323Sed /// [Load chain] 467193323Sed /// ^ 468193323Sed /// | 469193323Sed /// [Load] 470193323Sed /// ^ ^ 471193323Sed /// | | 472193323Sed /// / \-- 473193323Sed /// / | 474193323Sed ///[CALLSEQ_START] | 475193323Sed /// ^ | 476193323Sed /// | | 477193323Sed /// [LOAD/C2Reg] | 478193323Sed /// | | 479193323Sed /// \ / 480193323Sed /// \ / 481193323Sed /// [CALL] 482205218Srdivacky bool HasCallSeq = N->getOpcode() == X86ISD::CALL; 483204642Srdivacky SDValue Chain = N->getOperand(0); 484204642Srdivacky SDValue Load = N->getOperand(1); 485205218Srdivacky if (!isCalleeLoad(Load, Chain, HasCallSeq)) 486193323Sed continue; 487205218Srdivacky MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); 488193323Sed ++NumLoadMoved; 489193323Sed continue; 490193323Sed } 491239462Sdim 492204642Srdivacky // Lower fpround and fpextend nodes that target the FP stack to be store and 493204642Srdivacky // load to the stack. This is a gross hack. We would like to simply mark 494204642Srdivacky // these as being illegal, but when we do that, legalize produces these when 495204642Srdivacky // it expands calls, then expands these in the same legalize pass. We would 496204642Srdivacky // like dag combine to be able to hack on these between the call expansion 497204642Srdivacky // and the node legalization. As such this pass basically does "really 498204642Srdivacky // late" legalization of these inline with the X86 isel pass. 499204642Srdivacky // FIXME: This should only happen when not compiled with -O0. 500193323Sed if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 501193323Sed continue; 502239462Sdim 503261991Sdim MVT SrcVT = N->getOperand(0).getSimpleValueType(); 504261991Sdim MVT DstVT = N->getSimpleValueType(0); 505226633Sdim 506226633Sdim // If any of the sources are vectors, no fp stack involved. 507226633Sdim if (SrcVT.isVector() || DstVT.isVector()) 508226633Sdim continue; 509226633Sdim 510193323Sed // If the source and destination are SSE registers, then this is a legal 511193323Sed // conversion that should not be lowered. 512261991Sdim const X86TargetLowering *X86Lowering = 513261991Sdim static_cast<const X86TargetLowering *>(getTargetLowering()); 514261991Sdim bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); 515261991Sdim bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); 516193323Sed if (SrcIsSSE && DstIsSSE) 517193323Sed continue; 518193323Sed 519193323Sed if (!SrcIsSSE && !DstIsSSE) { 520193323Sed // If this is an FPStack extension, it is a noop. 521193323Sed if (N->getOpcode() == ISD::FP_EXTEND) 522193323Sed continue; 523193323Sed // If this is a value-preserving FPStack truncation, it is a noop. 524193323Sed if (N->getConstantOperandVal(1)) 525193323Sed continue; 526193323Sed } 527239462Sdim 528193323Sed // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 529193323Sed // FPStack has extload and truncstore. SSE can fold direct loads into other 530193323Sed // operations. Based on this, decide what we want to do. 531261991Sdim MVT MemVT; 532193323Sed if (N->getOpcode() == ISD::FP_ROUND) 533193323Sed MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 534193323Sed else 535193323Sed MemVT = SrcIsSSE ? SrcVT : DstVT; 536239462Sdim 537193323Sed SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 538261991Sdim SDLoc dl(N); 539239462Sdim 540193323Sed // FIXME: optimize the case where the src/dest is a load or store? 541193323Sed SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 542193323Sed N->getOperand(0), 543218893Sdim MemTmp, MachinePointerInfo(), MemVT, 544203954Srdivacky false, false, 0); 545218893Sdim SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 546218893Sdim MachinePointerInfo(), 547218893Sdim MemVT, false, false, 0); 548193323Sed 549193323Sed // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 550193323Sed // extload we created. This will cause general havok on the dag because 551193323Sed // anything below the conversion could be folded into other existing nodes. 552193323Sed // To avoid invalidating 'I', back it up to the convert node. 553193323Sed --I; 554193323Sed CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 555239462Sdim 556193323Sed // Now that we did that, the node is dead. Increment the iterator to the 557193323Sed // next node to process, then delete N. 558193323Sed ++I; 559193323Sed CurDAG->DeleteNode(N); 560239462Sdim } 561193323Sed} 562193323Sed 563193323Sed 564193323Sed/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 565193323Sed/// the main function. 566193323Sedvoid X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 567193323Sed MachineFrameInfo *MFI) { 568193323Sed const TargetInstrInfo *TII = TM.getInstrInfo(); 569218893Sdim if (Subtarget->isTargetCygMing()) { 570218893Sdim unsigned CallOp = 571234353Sdim Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; 572206124Srdivacky BuildMI(BB, DebugLoc(), 573218893Sdim TII->get(CallOp)).addExternalSymbol("__main"); 574218893Sdim } 575193323Sed} 576193323Sed 577207618Srdivackyvoid X86DAGToDAGISel::EmitFunctionEntryCode() { 578193323Sed // If this is main, emit special code for main. 579207618Srdivacky if (const Function *Fn = MF->getFunction()) 580207618Srdivacky if (Fn->hasExternalLinkage() && Fn->getName() == "main") 581207618Srdivacky EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); 582193323Sed} 583193323Sed 584224145Sdimstatic bool isDispSafeForFrameIndex(int64_t Val) { 585224145Sdim // On 64-bit platforms, we can run into an issue where a frame index 586224145Sdim // includes a displacement that, when added to the explicit displacement, 587224145Sdim // will overflow the displacement field. Assuming that the frame index 588224145Sdim // displacement fits into a 31-bit integer (which is only slightly more 589224145Sdim // aggressive than the current fundamental assumption that it fits into 590224145Sdim // a 32-bit integer), a 31-bit disp should always be safe. 591224145Sdim return isInt<31>(Val); 592224145Sdim} 593193323Sed 594224145Sdimbool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, 595224145Sdim X86ISelAddressMode &AM) { 596224145Sdim int64_t Val = AM.Disp + Offset; 597224145Sdim CodeModel::Model M = TM.getCodeModel(); 598224145Sdim if (Subtarget->is64Bit()) { 599224145Sdim if (!X86::isOffsetSuitableForCodeModel(Val, M, 600224145Sdim AM.hasSymbolicDisplacement())) 601224145Sdim return true; 602224145Sdim // In addition to the checks required for a register base, check that 603224145Sdim // we do not try to use an unsafe Disp with a frame index. 604224145Sdim if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && 605224145Sdim !isDispSafeForFrameIndex(Val)) 606224145Sdim return true; 607224145Sdim } 608224145Sdim AM.Disp = Val; 609224145Sdim return false; 610224145Sdim 611224145Sdim} 612224145Sdim 613218893Sdimbool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ 614218893Sdim SDValue Address = N->getOperand(1); 615239462Sdim 616218893Sdim // load gs:0 -> GS segment register. 617218893Sdim // load fs:0 -> FS segment register. 618218893Sdim // 619193323Sed // This optimization is valid because the GNU TLS model defines that 620193323Sed // gs:0 (or fs:0 on X86-64) contains its own address. 621193323Sed // For more information see http://people.redhat.com/drepper/tls.pdf 622218893Sdim if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) 623276479Sdim if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && 624239462Sdim Subtarget->isTargetLinux()) 625218893Sdim switch (N->getPointerInfo().getAddrSpace()) { 626218893Sdim case 256: 627218893Sdim AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 628218893Sdim return false; 629218893Sdim case 257: 630218893Sdim AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 631218893Sdim return false; 632218893Sdim } 633239462Sdim 634193323Sed return true; 635193323Sed} 636193323Sed 637195098Sed/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 638195098Sed/// into an addressing mode. These wrap things that will resolve down into a 639195098Sed/// symbol reference. If no match is possible, this returns true, otherwise it 640198090Srdivacky/// returns false. 641193323Sedbool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 642195098Sed // If the addressing mode already has a symbol as the displacement, we can 643195098Sed // never match another symbol. 644193323Sed if (AM.hasSymbolicDisplacement()) 645193323Sed return true; 646193323Sed 647193323Sed SDValue N0 = N.getOperand(0); 648198090Srdivacky CodeModel::Model M = TM.getCodeModel(); 649198090Srdivacky 650195098Sed // Handle X86-64 rip-relative addresses. We check this before checking direct 651195098Sed // folding because RIP is preferable to non-RIP accesses. 652234353Sdim if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP && 653195098Sed // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 654195098Sed // they cannot be folded into immediate fields. 655195098Sed // FIXME: This can be improved for kernel and other models? 656234353Sdim (M == CodeModel::Small || M == CodeModel::Kernel)) { 657234353Sdim // Base and index reg must be 0 in order to use %rip as base. 658234353Sdim if (AM.hasBaseOrIndexReg()) 659234353Sdim return true; 660195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 661224145Sdim X86ISelAddressMode Backup = AM; 662195098Sed AM.GV = G->getGlobal(); 663195098Sed AM.SymbolFlags = G->getTargetFlags(); 664224145Sdim if (FoldOffsetIntoAddress(G->getOffset(), AM)) { 665224145Sdim AM = Backup; 666224145Sdim return true; 667224145Sdim } 668195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 669224145Sdim X86ISelAddressMode Backup = AM; 670195098Sed AM.CP = CP->getConstVal(); 671195098Sed AM.Align = CP->getAlignment(); 672195098Sed AM.SymbolFlags = CP->getTargetFlags(); 673224145Sdim if (FoldOffsetIntoAddress(CP->getOffset(), AM)) { 674224145Sdim AM = Backup; 675224145Sdim return true; 676224145Sdim } 677195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 678195098Sed AM.ES = S->getSymbol(); 679195098Sed AM.SymbolFlags = S->getTargetFlags(); 680198892Srdivacky } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 681195098Sed AM.JT = J->getIndex(); 682195098Sed AM.SymbolFlags = J->getTargetFlags(); 683243830Sdim } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { 684243830Sdim X86ISelAddressMode Backup = AM; 685243830Sdim AM.BlockAddr = BA->getBlockAddress(); 686243830Sdim AM.SymbolFlags = BA->getTargetFlags(); 687243830Sdim if (FoldOffsetIntoAddress(BA->getOffset(), AM)) { 688243830Sdim AM = Backup; 689243830Sdim return true; 690243830Sdim } 691243830Sdim } else 692243830Sdim llvm_unreachable("Unhandled symbol reference node."); 693198090Srdivacky 694195098Sed if (N.getOpcode() == X86ISD::WrapperRIP) 695195098Sed AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 696195098Sed return false; 697195098Sed } 698195098Sed 699195098Sed // Handle the case when globals fit in our immediate field: This is true for 700234353Sdim // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit 701234353Sdim // mode, this only applies to a non-RIP-relative computation. 702195098Sed if (!Subtarget->is64Bit() || 703234353Sdim M == CodeModel::Small || M == CodeModel::Kernel) { 704234353Sdim assert(N.getOpcode() != X86ISD::WrapperRIP && 705234353Sdim "RIP-relative addressing already handled"); 706195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 707195098Sed AM.GV = G->getGlobal(); 708195098Sed AM.Disp += G->getOffset(); 709195098Sed AM.SymbolFlags = G->getTargetFlags(); 710195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 711193323Sed AM.CP = CP->getConstVal(); 712193323Sed AM.Align = CP->getAlignment(); 713195098Sed AM.Disp += CP->getOffset(); 714195098Sed AM.SymbolFlags = CP->getTargetFlags(); 715195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 716195098Sed AM.ES = S->getSymbol(); 717195098Sed AM.SymbolFlags = S->getTargetFlags(); 718198892Srdivacky } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 719195098Sed AM.JT = J->getIndex(); 720195098Sed AM.SymbolFlags = J->getTargetFlags(); 721243830Sdim } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { 722243830Sdim AM.BlockAddr = BA->getBlockAddress(); 723243830Sdim AM.Disp += BA->getOffset(); 724243830Sdim AM.SymbolFlags = BA->getTargetFlags(); 725243830Sdim } else 726243830Sdim llvm_unreachable("Unhandled symbol reference node."); 727193323Sed return false; 728193323Sed } 729193323Sed 730193323Sed return true; 731193323Sed} 732193323Sed 733193323Sed/// MatchAddress - Add the specified node to the specified addressing mode, 734193323Sed/// returning true if it cannot be done. This just pattern matches for the 735193323Sed/// addressing mode. 736198090Srdivackybool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 737210299Sed if (MatchAddressRecursively(N, AM, 0)) 738198090Srdivacky return true; 739198090Srdivacky 740198090Srdivacky // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 741198090Srdivacky // a smaller encoding and avoids a scaled-index. 742198090Srdivacky if (AM.Scale == 2 && 743198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 744276479Sdim AM.Base_Reg.getNode() == nullptr) { 745207618Srdivacky AM.Base_Reg = AM.IndexReg; 746198090Srdivacky AM.Scale = 1; 747198090Srdivacky } 748198090Srdivacky 749198090Srdivacky // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 750198090Srdivacky // because it has a smaller encoding. 751198090Srdivacky // TODO: Which other code models can use this? 752198090Srdivacky if (TM.getCodeModel() == CodeModel::Small && 753198090Srdivacky Subtarget->is64Bit() && 754198090Srdivacky AM.Scale == 1 && 755198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 756276479Sdim AM.Base_Reg.getNode() == nullptr && 757276479Sdim AM.IndexReg.getNode() == nullptr && 758198090Srdivacky AM.SymbolFlags == X86II::MO_NO_FLAG && 759198090Srdivacky AM.hasSymbolicDisplacement()) 760207618Srdivacky AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 761198090Srdivacky 762198090Srdivacky return false; 763198090Srdivacky} 764198090Srdivacky 765234353Sdim// Insert a node into the DAG at least before the Pos node's position. This 766234353Sdim// will reposition the node as needed, and will assign it a node ID that is <= 767234353Sdim// the Pos node's ID. Note that this does *not* preserve the uniqueness of node 768234353Sdim// IDs! The selection DAG must no longer depend on their uniqueness when this 769234353Sdim// is used. 770234353Sdimstatic void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { 771234353Sdim if (N.getNode()->getNodeId() == -1 || 772234353Sdim N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) { 773234353Sdim DAG.RepositionNode(Pos.getNode(), N.getNode()); 774234353Sdim N.getNode()->setNodeId(Pos.getNode()->getNodeId()); 775234353Sdim } 776234353Sdim} 777234353Sdim 778234353Sdim// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This 779234353Sdim// allows us to convert the shift and and into an h-register extract and 780234353Sdim// a scaled index. Returns false if the simplification is performed. 781234353Sdimstatic bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, 782234353Sdim uint64_t Mask, 783234353Sdim SDValue Shift, SDValue X, 784234353Sdim X86ISelAddressMode &AM) { 785234353Sdim if (Shift.getOpcode() != ISD::SRL || 786234353Sdim !isa<ConstantSDNode>(Shift.getOperand(1)) || 787234353Sdim !Shift.hasOneUse()) 788234353Sdim return true; 789234353Sdim 790234353Sdim int ScaleLog = 8 - Shift.getConstantOperandVal(1); 791234353Sdim if (ScaleLog <= 0 || ScaleLog >= 4 || 792234353Sdim Mask != (0xffu << ScaleLog)) 793234353Sdim return true; 794234353Sdim 795261991Sdim MVT VT = N.getSimpleValueType(); 796261991Sdim SDLoc DL(N); 797234353Sdim SDValue Eight = DAG.getConstant(8, MVT::i8); 798234353Sdim SDValue NewMask = DAG.getConstant(0xff, VT); 799234353Sdim SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); 800234353Sdim SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); 801234353Sdim SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8); 802234353Sdim SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); 803234353Sdim 804234353Sdim // Insert the new nodes into the topological ordering. We must do this in 805234353Sdim // a valid topological ordering as nothing is going to go back and re-sort 806234353Sdim // these nodes. We continually insert before 'N' in sequence as this is 807234353Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 808234353Sdim // hierarchy left to express. 809234353Sdim InsertDAGNode(DAG, N, Eight); 810234353Sdim InsertDAGNode(DAG, N, Srl); 811234353Sdim InsertDAGNode(DAG, N, NewMask); 812234353Sdim InsertDAGNode(DAG, N, And); 813234353Sdim InsertDAGNode(DAG, N, ShlCount); 814234353Sdim InsertDAGNode(DAG, N, Shl); 815234353Sdim DAG.ReplaceAllUsesWith(N, Shl); 816234353Sdim AM.IndexReg = And; 817234353Sdim AM.Scale = (1 << ScaleLog); 818234353Sdim return false; 819234353Sdim} 820234353Sdim 821234353Sdim// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this 822234353Sdim// allows us to fold the shift into this addressing mode. Returns false if the 823234353Sdim// transform succeeded. 824234353Sdimstatic bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, 825234353Sdim uint64_t Mask, 826234353Sdim SDValue Shift, SDValue X, 827234353Sdim X86ISelAddressMode &AM) { 828234353Sdim if (Shift.getOpcode() != ISD::SHL || 829234353Sdim !isa<ConstantSDNode>(Shift.getOperand(1))) 830234353Sdim return true; 831234353Sdim 832234353Sdim // Not likely to be profitable if either the AND or SHIFT node has more 833234353Sdim // than one use (unless all uses are for address computation). Besides, 834234353Sdim // isel mechanism requires their node ids to be reused. 835234353Sdim if (!N.hasOneUse() || !Shift.hasOneUse()) 836234353Sdim return true; 837234353Sdim 838234353Sdim // Verify that the shift amount is something we can fold. 839234353Sdim unsigned ShiftAmt = Shift.getConstantOperandVal(1); 840234353Sdim if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) 841234353Sdim return true; 842234353Sdim 843261991Sdim MVT VT = N.getSimpleValueType(); 844261991Sdim SDLoc DL(N); 845234353Sdim SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT); 846234353Sdim SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); 847234353Sdim SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); 848234353Sdim 849234353Sdim // Insert the new nodes into the topological ordering. We must do this in 850234353Sdim // a valid topological ordering as nothing is going to go back and re-sort 851234353Sdim // these nodes. We continually insert before 'N' in sequence as this is 852234353Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 853234353Sdim // hierarchy left to express. 854234353Sdim InsertDAGNode(DAG, N, NewMask); 855234353Sdim InsertDAGNode(DAG, N, NewAnd); 856234353Sdim InsertDAGNode(DAG, N, NewShift); 857234353Sdim DAG.ReplaceAllUsesWith(N, NewShift); 858234353Sdim 859234353Sdim AM.Scale = 1 << ShiftAmt; 860234353Sdim AM.IndexReg = NewAnd; 861234353Sdim return false; 862234353Sdim} 863234353Sdim 864234353Sdim// Implement some heroics to detect shifts of masked values where the mask can 865234353Sdim// be replaced by extending the shift and undoing that in the addressing mode 866234353Sdim// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and 867234353Sdim// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in 868234353Sdim// the addressing mode. This results in code such as: 869234353Sdim// 870234353Sdim// int f(short *y, int *lookup_table) { 871234353Sdim// ... 872234353Sdim// return *y + lookup_table[*y >> 11]; 873234353Sdim// } 874234353Sdim// 875234353Sdim// Turning into: 876234353Sdim// movzwl (%rdi), %eax 877234353Sdim// movl %eax, %ecx 878234353Sdim// shrl $11, %ecx 879234353Sdim// addl (%rsi,%rcx,4), %eax 880234353Sdim// 881234353Sdim// Instead of: 882234353Sdim// movzwl (%rdi), %eax 883234353Sdim// movl %eax, %ecx 884234353Sdim// shrl $9, %ecx 885234353Sdim// andl $124, %rcx 886234353Sdim// addl (%rsi,%rcx), %eax 887234353Sdim// 888234353Sdim// Note that this function assumes the mask is provided as a mask *after* the 889234353Sdim// value is shifted. The input chain may or may not match that, but computing 890234353Sdim// such a mask is trivial. 891234353Sdimstatic bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, 892234353Sdim uint64_t Mask, 893234353Sdim SDValue Shift, SDValue X, 894234353Sdim X86ISelAddressMode &AM) { 895234353Sdim if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || 896234353Sdim !isa<ConstantSDNode>(Shift.getOperand(1))) 897234353Sdim return true; 898234353Sdim 899234353Sdim unsigned ShiftAmt = Shift.getConstantOperandVal(1); 900261991Sdim unsigned MaskLZ = countLeadingZeros(Mask); 901261991Sdim unsigned MaskTZ = countTrailingZeros(Mask); 902234353Sdim 903234353Sdim // The amount of shift we're trying to fit into the addressing mode is taken 904234353Sdim // from the trailing zeros of the mask. 905234353Sdim unsigned AMShiftAmt = MaskTZ; 906234353Sdim 907234353Sdim // There is nothing we can do here unless the mask is removing some bits. 908234353Sdim // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. 909234353Sdim if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true; 910234353Sdim 911234353Sdim // We also need to ensure that mask is a continuous run of bits. 912234353Sdim if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; 913234353Sdim 914234353Sdim // Scale the leading zero count down based on the actual size of the value. 915234353Sdim // Also scale it down based on the size of the shift. 916261991Sdim MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; 917234353Sdim 918234353Sdim // The final check is to ensure that any masked out high bits of X are 919234353Sdim // already known to be zero. Otherwise, the mask has a semantic impact 920234353Sdim // other than masking out a couple of low bits. Unfortunately, because of 921234353Sdim // the mask, zero extensions will be removed from operands in some cases. 922234353Sdim // This code works extra hard to look through extensions because we can 923234353Sdim // replace them with zero extensions cheaply if necessary. 924234353Sdim bool ReplacingAnyExtend = false; 925234353Sdim if (X.getOpcode() == ISD::ANY_EXTEND) { 926261991Sdim unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - 927261991Sdim X.getOperand(0).getSimpleValueType().getSizeInBits(); 928234353Sdim // Assume that we'll replace the any-extend with a zero-extend, and 929234353Sdim // narrow the search to the extended value. 930234353Sdim X = X.getOperand(0); 931234353Sdim MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; 932234353Sdim ReplacingAnyExtend = true; 933234353Sdim } 934261991Sdim APInt MaskedHighBits = 935261991Sdim APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); 936234353Sdim APInt KnownZero, KnownOne; 937276479Sdim DAG.computeKnownBits(X, KnownZero, KnownOne); 938234353Sdim if (MaskedHighBits != KnownZero) return true; 939234353Sdim 940234353Sdim // We've identified a pattern that can be transformed into a single shift 941234353Sdim // and an addressing mode. Make it so. 942261991Sdim MVT VT = N.getSimpleValueType(); 943234353Sdim if (ReplacingAnyExtend) { 944234353Sdim assert(X.getValueType() != VT); 945234353Sdim // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. 946261991Sdim SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); 947234353Sdim InsertDAGNode(DAG, N, NewX); 948234353Sdim X = NewX; 949234353Sdim } 950261991Sdim SDLoc DL(N); 951234353Sdim SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); 952234353Sdim SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); 953234353Sdim SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8); 954234353Sdim SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); 955234353Sdim 956234353Sdim // Insert the new nodes into the topological ordering. We must do this in 957234353Sdim // a valid topological ordering as nothing is going to go back and re-sort 958234353Sdim // these nodes. We continually insert before 'N' in sequence as this is 959234353Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 960234353Sdim // hierarchy left to express. 961234353Sdim InsertDAGNode(DAG, N, NewSRLAmt); 962234353Sdim InsertDAGNode(DAG, N, NewSRL); 963234353Sdim InsertDAGNode(DAG, N, NewSHLAmt); 964234353Sdim InsertDAGNode(DAG, N, NewSHL); 965234353Sdim DAG.ReplaceAllUsesWith(N, NewSHL); 966234353Sdim 967234353Sdim AM.Scale = 1 << AMShiftAmt; 968234353Sdim AM.IndexReg = NewSRL; 969234353Sdim return false; 970234353Sdim} 971234353Sdim 972198090Srdivackybool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 973198090Srdivacky unsigned Depth) { 974261991Sdim SDLoc dl(N); 975198090Srdivacky DEBUG({ 976202375Srdivacky dbgs() << "MatchAddress: "; 977198090Srdivacky AM.dump(); 978198090Srdivacky }); 979193323Sed // Limit recursion. 980193323Sed if (Depth > 5) 981193323Sed return MatchAddressBase(N, AM); 982198090Srdivacky 983195098Sed // If this is already a %rip relative address, we can only merge immediates 984195098Sed // into it. Instead of handling this in every case, we handle it here. 985193323Sed // RIP relative addressing: %rip + 32-bit displacement! 986195098Sed if (AM.isRIPRelative()) { 987195098Sed // FIXME: JumpTable and ExternalSymbol address currently don't like 988195098Sed // displacements. It isn't very important, but this should be fixed for 989195098Sed // consistency. 990195098Sed if (!AM.ES && AM.JT != -1) return true; 991198090Srdivacky 992224145Sdim if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) 993224145Sdim if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) 994193323Sed return false; 995193323Sed return true; 996193323Sed } 997193323Sed 998193323Sed switch (N.getOpcode()) { 999193323Sed default: break; 1000193323Sed case ISD::Constant: { 1001193323Sed uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 1002224145Sdim if (!FoldOffsetIntoAddress(Val, AM)) 1003193323Sed return false; 1004193323Sed break; 1005193323Sed } 1006193323Sed 1007193323Sed case X86ISD::Wrapper: 1008195098Sed case X86ISD::WrapperRIP: 1009193323Sed if (!MatchWrapper(N, AM)) 1010193323Sed return false; 1011193323Sed break; 1012193323Sed 1013193323Sed case ISD::LOAD: 1014218893Sdim if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM)) 1015193323Sed return false; 1016193323Sed break; 1017193323Sed 1018193323Sed case ISD::FrameIndex: 1019224145Sdim if (AM.BaseType == X86ISelAddressMode::RegBase && 1020276479Sdim AM.Base_Reg.getNode() == nullptr && 1021224145Sdim (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { 1022193323Sed AM.BaseType = X86ISelAddressMode::FrameIndexBase; 1023207618Srdivacky AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 1024193323Sed return false; 1025193323Sed } 1026193323Sed break; 1027193323Sed 1028193323Sed case ISD::SHL: 1029276479Sdim if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) 1030193323Sed break; 1031239462Sdim 1032193323Sed if (ConstantSDNode 1033193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 1034193323Sed unsigned Val = CN->getZExtValue(); 1035198090Srdivacky // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 1036198090Srdivacky // that the base operand remains free for further matching. If 1037198090Srdivacky // the base doesn't end up getting used, a post-processing step 1038198090Srdivacky // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 1039193323Sed if (Val == 1 || Val == 2 || Val == 3) { 1040193323Sed AM.Scale = 1 << Val; 1041193323Sed SDValue ShVal = N.getNode()->getOperand(0); 1042193323Sed 1043193323Sed // Okay, we know that we have a scale by now. However, if the scaled 1044193323Sed // value is an add of something and a constant, we can fold the 1045193323Sed // constant into the disp field here. 1046218893Sdim if (CurDAG->isBaseWithConstantOffset(ShVal)) { 1047193323Sed AM.IndexReg = ShVal.getNode()->getOperand(0); 1048193323Sed ConstantSDNode *AddVal = 1049193323Sed cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 1050243830Sdim uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; 1051224145Sdim if (!FoldOffsetIntoAddress(Disp, AM)) 1052224145Sdim return false; 1053193323Sed } 1054224145Sdim 1055224145Sdim AM.IndexReg = ShVal; 1056193323Sed return false; 1057193323Sed } 1058249423Sdim } 1059193323Sed break; 1060193323Sed 1061234353Sdim case ISD::SRL: { 1062234353Sdim // Scale must not be used already. 1063276479Sdim if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; 1064234353Sdim 1065234353Sdim SDValue And = N.getOperand(0); 1066234353Sdim if (And.getOpcode() != ISD::AND) break; 1067234353Sdim SDValue X = And.getOperand(0); 1068234353Sdim 1069234353Sdim // We only handle up to 64-bit values here as those are what matter for 1070234353Sdim // addressing mode optimizations. 1071261991Sdim if (X.getSimpleValueType().getSizeInBits() > 64) break; 1072234353Sdim 1073234353Sdim // The mask used for the transform is expected to be post-shift, but we 1074234353Sdim // found the shift first so just apply the shift to the mask before passing 1075234353Sdim // it down. 1076234353Sdim if (!isa<ConstantSDNode>(N.getOperand(1)) || 1077234353Sdim !isa<ConstantSDNode>(And.getOperand(1))) 1078234353Sdim break; 1079234353Sdim uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); 1080234353Sdim 1081234353Sdim // Try to fold the mask and shift into the scale, and return false if we 1082234353Sdim // succeed. 1083234353Sdim if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) 1084234353Sdim return false; 1085234353Sdim break; 1086234353Sdim } 1087234353Sdim 1088193323Sed case ISD::SMUL_LOHI: 1089193323Sed case ISD::UMUL_LOHI: 1090193323Sed // A mul_lohi where we need the low part can be folded as a plain multiply. 1091193323Sed if (N.getResNo() != 0) break; 1092193323Sed // FALL THROUGH 1093193323Sed case ISD::MUL: 1094193323Sed case X86ISD::MUL_IMM: 1095193323Sed // X*[3,5,9] -> X+X*[2,4,8] 1096193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 1097276479Sdim AM.Base_Reg.getNode() == nullptr && 1098276479Sdim AM.IndexReg.getNode() == nullptr) { 1099193323Sed if (ConstantSDNode 1100193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 1101193323Sed if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 1102193323Sed CN->getZExtValue() == 9) { 1103193323Sed AM.Scale = unsigned(CN->getZExtValue())-1; 1104193323Sed 1105193323Sed SDValue MulVal = N.getNode()->getOperand(0); 1106193323Sed SDValue Reg; 1107193323Sed 1108193323Sed // Okay, we know that we have a scale by now. However, if the scaled 1109193323Sed // value is an add of something and a constant, we can fold the 1110193323Sed // constant into the disp field here. 1111193323Sed if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 1112193323Sed isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 1113193323Sed Reg = MulVal.getNode()->getOperand(0); 1114193323Sed ConstantSDNode *AddVal = 1115193323Sed cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 1116224145Sdim uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); 1117224145Sdim if (FoldOffsetIntoAddress(Disp, AM)) 1118193323Sed Reg = N.getNode()->getOperand(0); 1119193323Sed } else { 1120193323Sed Reg = N.getNode()->getOperand(0); 1121193323Sed } 1122193323Sed 1123207618Srdivacky AM.IndexReg = AM.Base_Reg = Reg; 1124193323Sed return false; 1125193323Sed } 1126193323Sed } 1127193323Sed break; 1128193323Sed 1129193323Sed case ISD::SUB: { 1130193323Sed // Given A-B, if A can be completely folded into the address and 1131193323Sed // the index field with the index field unused, use -B as the index. 1132193323Sed // This is a win if a has multiple parts that can be folded into 1133193323Sed // the address. Also, this saves a mov if the base register has 1134193323Sed // other uses, since it avoids a two-address sub instruction, however 1135193323Sed // it costs an additional mov if the index register has other uses. 1136193323Sed 1137210299Sed // Add an artificial use to this node so that we can keep track of 1138210299Sed // it if it gets CSE'd with a different node. 1139210299Sed HandleSDNode Handle(N); 1140210299Sed 1141193323Sed // Test if the LHS of the sub can be folded. 1142193323Sed X86ISelAddressMode Backup = AM; 1143210299Sed if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 1144193323Sed AM = Backup; 1145193323Sed break; 1146193323Sed } 1147193323Sed // Test if the index field is free for use. 1148195098Sed if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 1149193323Sed AM = Backup; 1150193323Sed break; 1151193323Sed } 1152205407Srdivacky 1153193323Sed int Cost = 0; 1154210299Sed SDValue RHS = Handle.getValue().getNode()->getOperand(1); 1155193323Sed // If the RHS involves a register with multiple uses, this 1156193323Sed // transformation incurs an extra mov, due to the neg instruction 1157193323Sed // clobbering its operand. 1158193323Sed if (!RHS.getNode()->hasOneUse() || 1159193323Sed RHS.getNode()->getOpcode() == ISD::CopyFromReg || 1160193323Sed RHS.getNode()->getOpcode() == ISD::TRUNCATE || 1161193323Sed RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 1162193323Sed (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 1163193323Sed RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 1164193323Sed ++Cost; 1165193323Sed // If the base is a register with multiple uses, this 1166193323Sed // transformation may save a mov. 1167193323Sed if ((AM.BaseType == X86ISelAddressMode::RegBase && 1168207618Srdivacky AM.Base_Reg.getNode() && 1169207618Srdivacky !AM.Base_Reg.getNode()->hasOneUse()) || 1170193323Sed AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1171193323Sed --Cost; 1172193323Sed // If the folded LHS was interesting, this transformation saves 1173193323Sed // address arithmetic. 1174193323Sed if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 1175193323Sed ((AM.Disp != 0) && (Backup.Disp == 0)) + 1176193323Sed (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 1177193323Sed --Cost; 1178193323Sed // If it doesn't look like it may be an overall win, don't do it. 1179193323Sed if (Cost >= 0) { 1180193323Sed AM = Backup; 1181193323Sed break; 1182193323Sed } 1183193323Sed 1184193323Sed // Ok, the transformation is legal and appears profitable. Go for it. 1185193323Sed SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 1186193323Sed SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 1187193323Sed AM.IndexReg = Neg; 1188193323Sed AM.Scale = 1; 1189193323Sed 1190193323Sed // Insert the new nodes into the topological ordering. 1191234353Sdim InsertDAGNode(*CurDAG, N, Zero); 1192234353Sdim InsertDAGNode(*CurDAG, N, Neg); 1193193323Sed return false; 1194193323Sed } 1195193323Sed 1196193323Sed case ISD::ADD: { 1197210299Sed // Add an artificial use to this node so that we can keep track of 1198210299Sed // it if it gets CSE'd with a different node. 1199210299Sed HandleSDNode Handle(N); 1200210299Sed 1201193323Sed X86ISelAddressMode Backup = AM; 1202218893Sdim if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1203218893Sdim !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) 1204210299Sed return false; 1205210299Sed AM = Backup; 1206239462Sdim 1207205407Srdivacky // Try again after commuting the operands. 1208218893Sdim if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& 1209218893Sdim !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) 1210210299Sed return false; 1211193323Sed AM = Backup; 1212193323Sed 1213193323Sed // If we couldn't fold both operands into the address at the same time, 1214193323Sed // see if we can just put each operand into a register and fold at least 1215193323Sed // the add. 1216193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 1217207618Srdivacky !AM.Base_Reg.getNode() && 1218195098Sed !AM.IndexReg.getNode()) { 1219218893Sdim N = Handle.getValue(); 1220218893Sdim AM.Base_Reg = N.getOperand(0); 1221218893Sdim AM.IndexReg = N.getOperand(1); 1222193323Sed AM.Scale = 1; 1223193323Sed return false; 1224193323Sed } 1225218893Sdim N = Handle.getValue(); 1226193323Sed break; 1227193323Sed } 1228193323Sed 1229193323Sed case ISD::OR: 1230193323Sed // Handle "X | C" as "X + C" iff X is known to have C bits clear. 1231218893Sdim if (CurDAG->isBaseWithConstantOffset(N)) { 1232193323Sed X86ISelAddressMode Backup = AM; 1233207618Srdivacky ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); 1234205407Srdivacky 1235193323Sed // Start with the LHS as an addr mode. 1236210299Sed if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1237224145Sdim !FoldOffsetIntoAddress(CN->getSExtValue(), AM)) 1238193323Sed return false; 1239193323Sed AM = Backup; 1240193323Sed } 1241193323Sed break; 1242239462Sdim 1243193323Sed case ISD::AND: { 1244193323Sed // Perform some heroic transforms on an and of a constant-count shift 1245193323Sed // with a constant to enable use of the scaled offset field. 1246193323Sed 1247193323Sed // Scale must not be used already. 1248276479Sdim if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; 1249193323Sed 1250234353Sdim SDValue Shift = N.getOperand(0); 1251234353Sdim if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; 1252193323Sed SDValue X = Shift.getOperand(0); 1253193323Sed 1254234353Sdim // We only handle up to 64-bit values here as those are what matter for 1255234353Sdim // addressing mode optimizations. 1256261991Sdim if (X.getSimpleValueType().getSizeInBits() > 64) break; 1257193323Sed 1258234353Sdim if (!isa<ConstantSDNode>(N.getOperand(1))) 1259234353Sdim break; 1260234353Sdim uint64_t Mask = N.getConstantOperandVal(1); 1261193323Sed 1262234353Sdim // Try to fold the mask and shift into an extract and scale. 1263234353Sdim if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) 1264234353Sdim return false; 1265193323Sed 1266234353Sdim // Try to fold the mask and shift directly into the scale. 1267234353Sdim if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) 1268234353Sdim return false; 1269193323Sed 1270234353Sdim // Try to swap the mask and shift to place shifts which can be done as 1271234353Sdim // a scale on the outside of the mask. 1272234353Sdim if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM)) 1273234353Sdim return false; 1274234353Sdim break; 1275193323Sed } 1276193323Sed } 1277193323Sed 1278193323Sed return MatchAddressBase(N, AM); 1279193323Sed} 1280193323Sed 1281193323Sed/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1282193323Sed/// specified addressing mode without any further recursion. 1283193323Sedbool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1284193323Sed // Is the base register already occupied? 1285207618Srdivacky if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { 1286193323Sed // If so, check to see if the scale index register is set. 1287276479Sdim if (!AM.IndexReg.getNode()) { 1288193323Sed AM.IndexReg = N; 1289193323Sed AM.Scale = 1; 1290193323Sed return false; 1291193323Sed } 1292193323Sed 1293193323Sed // Otherwise, we cannot select it. 1294193323Sed return true; 1295193323Sed } 1296193323Sed 1297193323Sed // Default, generate it as a register. 1298193323Sed AM.BaseType = X86ISelAddressMode::RegBase; 1299207618Srdivacky AM.Base_Reg = N; 1300193323Sed return false; 1301193323Sed} 1302193323Sed 1303193323Sed/// SelectAddr - returns true if it is able pattern match an addressing mode. 1304193323Sed/// It returns the operands which make up the maximal addressing mode it can 1305193323Sed/// match by reference. 1306218893Sdim/// 1307218893Sdim/// Parent is the parent node of the addr operand that is being matched. It 1308218893Sdim/// is always a load, store, atomic node, or null. It is only null when 1309218893Sdim/// checking memory operands for inline asm nodes. 1310218893Sdimbool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, 1311193323Sed SDValue &Scale, SDValue &Index, 1312193323Sed SDValue &Disp, SDValue &Segment) { 1313193323Sed X86ISelAddressMode AM; 1314239462Sdim 1315218893Sdim if (Parent && 1316218893Sdim // This list of opcodes are all the nodes that have an "addr:$ptr" operand 1317218893Sdim // that are not a MemSDNode, and thus don't have proper addrspace info. 1318218893Sdim Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme 1319218893Sdim Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores 1320243830Sdim Parent->getOpcode() != X86ISD::TLSCALL && // Fixme 1321243830Sdim Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp 1322243830Sdim Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp 1323218893Sdim unsigned AddrSpace = 1324218893Sdim cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); 1325218893Sdim // AddrSpace 256 -> GS, 257 -> FS. 1326218893Sdim if (AddrSpace == 256) 1327218893Sdim AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 1328218893Sdim if (AddrSpace == 257) 1329218893Sdim AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 1330218893Sdim } 1331239462Sdim 1332201360Srdivacky if (MatchAddress(N, AM)) 1333193323Sed return false; 1334193323Sed 1335261991Sdim MVT VT = N.getSimpleValueType(); 1336193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) { 1337207618Srdivacky if (!AM.Base_Reg.getNode()) 1338207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, VT); 1339193323Sed } 1340193323Sed 1341193323Sed if (!AM.IndexReg.getNode()) 1342193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1343193323Sed 1344193323Sed getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1345193323Sed return true; 1346193323Sed} 1347193323Sed 1348193323Sed/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1349193323Sed/// match a load whose top elements are either undef or zeros. The load flavor 1350193323Sed/// is derived from the type of N, which is either v4f32 or v2f64. 1351204642Srdivacky/// 1352204642Srdivacky/// We also return: 1353204642Srdivacky/// PatternChainNode: this is the matched node that has a chain input and 1354204642Srdivacky/// output. 1355204642Srdivackybool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, 1356193323Sed SDValue N, SDValue &Base, 1357193323Sed SDValue &Scale, SDValue &Index, 1358193323Sed SDValue &Disp, SDValue &Segment, 1359204642Srdivacky SDValue &PatternNodeWithChain) { 1360193323Sed if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1361204642Srdivacky PatternNodeWithChain = N.getOperand(0); 1362204642Srdivacky if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && 1363204642Srdivacky PatternNodeWithChain.hasOneUse() && 1364204642Srdivacky IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1365207618Srdivacky IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 1366204642Srdivacky LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); 1367218893Sdim if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1368193323Sed return false; 1369193323Sed return true; 1370193323Sed } 1371193323Sed } 1372193323Sed 1373193323Sed // Also handle the case where we explicitly require zeros in the top 1374193323Sed // elements. This is a vector shuffle from the zero vector. 1375193323Sed if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1376193323Sed // Check to see if the top elements are all zeros (or bitcast of zeros). 1377239462Sdim N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1378193323Sed N.getOperand(0).getNode()->hasOneUse() && 1379193323Sed ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1380204642Srdivacky N.getOperand(0).getOperand(0).hasOneUse() && 1381204642Srdivacky IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1382207618Srdivacky IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 1383193323Sed // Okay, this is a zero extending load. Fold it. 1384193323Sed LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1385218893Sdim if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1386193323Sed return false; 1387204642Srdivacky PatternNodeWithChain = SDValue(LD, 0); 1388193323Sed return true; 1389193323Sed } 1390193323Sed return false; 1391193323Sed} 1392193323Sed 1393193323Sed 1394261991Sdimbool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { 1395261991Sdim if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1396261991Sdim uint64_t ImmVal = CN->getZExtValue(); 1397261991Sdim if ((uint32_t)ImmVal != (uint64_t)ImmVal) 1398261991Sdim return false; 1399261991Sdim 1400261991Sdim Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64); 1401261991Sdim return true; 1402261991Sdim } 1403261991Sdim 1404261991Sdim // In static codegen with small code model, we can get the address of a label 1405261991Sdim // into a register with 'movl'. TableGen has already made sure we're looking 1406261991Sdim // at a label of some kind. 1407261991Sdim assert(N->getOpcode() == X86ISD::Wrapper && 1408261991Sdim "Unexpected node type for MOV32ri64"); 1409261991Sdim N = N.getOperand(0); 1410261991Sdim 1411261991Sdim if (N->getOpcode() != ISD::TargetConstantPool && 1412261991Sdim N->getOpcode() != ISD::TargetJumpTable && 1413261991Sdim N->getOpcode() != ISD::TargetGlobalAddress && 1414261991Sdim N->getOpcode() != ISD::TargetExternalSymbol && 1415261991Sdim N->getOpcode() != ISD::TargetBlockAddress) 1416261991Sdim return false; 1417261991Sdim 1418261991Sdim Imm = N; 1419261991Sdim return TM.getCodeModel() == CodeModel::Small; 1420261991Sdim} 1421261991Sdim 1422261991Sdimbool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base, 1423261991Sdim SDValue &Scale, SDValue &Index, 1424261991Sdim SDValue &Disp, SDValue &Segment) { 1425261991Sdim if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment)) 1426261991Sdim return false; 1427261991Sdim 1428261991Sdim SDLoc DL(N); 1429261991Sdim RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base); 1430261991Sdim if (RN && RN->getReg() == 0) 1431261991Sdim Base = CurDAG->getRegister(0, MVT::i64); 1432261991Sdim else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(N)) { 1433261991Sdim // Base could already be %rip, particularly in the x32 ABI. 1434261991Sdim Base = SDValue(CurDAG->getMachineNode( 1435261991Sdim TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, 1436261991Sdim CurDAG->getTargetConstant(0, MVT::i64), 1437261991Sdim Base, 1438261991Sdim CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 1439261991Sdim 0); 1440261991Sdim } 1441261991Sdim 1442261991Sdim RN = dyn_cast<RegisterSDNode>(Index); 1443261991Sdim if (RN && RN->getReg() == 0) 1444261991Sdim Index = CurDAG->getRegister(0, MVT::i64); 1445261991Sdim else { 1446261991Sdim assert(Index.getValueType() == MVT::i32 && 1447261991Sdim "Expect to be extending 32-bit registers for use in LEA"); 1448261991Sdim Index = SDValue(CurDAG->getMachineNode( 1449261991Sdim TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, 1450261991Sdim CurDAG->getTargetConstant(0, MVT::i64), 1451261991Sdim Index, 1452261991Sdim CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 1453261991Sdim 0); 1454261991Sdim } 1455261991Sdim 1456261991Sdim return true; 1457261991Sdim} 1458261991Sdim 1459193323Sed/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1460193323Sed/// mode it matches can be cost effectively emitted as an LEA instruction. 1461218893Sdimbool X86DAGToDAGISel::SelectLEAAddr(SDValue N, 1462193323Sed SDValue &Base, SDValue &Scale, 1463210299Sed SDValue &Index, SDValue &Disp, 1464210299Sed SDValue &Segment) { 1465193323Sed X86ISelAddressMode AM; 1466193323Sed 1467193323Sed // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1468193323Sed // segments. 1469193323Sed SDValue Copy = AM.Segment; 1470193323Sed SDValue T = CurDAG->getRegister(0, MVT::i32); 1471193323Sed AM.Segment = T; 1472193323Sed if (MatchAddress(N, AM)) 1473193323Sed return false; 1474193323Sed assert (T == AM.Segment); 1475193323Sed AM.Segment = Copy; 1476193323Sed 1477261991Sdim MVT VT = N.getSimpleValueType(); 1478193323Sed unsigned Complexity = 0; 1479193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) 1480207618Srdivacky if (AM.Base_Reg.getNode()) 1481193323Sed Complexity = 1; 1482193323Sed else 1483207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, VT); 1484193323Sed else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1485193323Sed Complexity = 4; 1486193323Sed 1487193323Sed if (AM.IndexReg.getNode()) 1488193323Sed Complexity++; 1489193323Sed else 1490193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1491193323Sed 1492193323Sed // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1493193323Sed // a simple shift. 1494193323Sed if (AM.Scale > 1) 1495193323Sed Complexity++; 1496193323Sed 1497193323Sed // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1498193323Sed // to a LEA. This is determined with some expermentation but is by no means 1499193323Sed // optimal (especially for code size consideration). LEA is nice because of 1500193323Sed // its three-address nature. Tweak the cost function again when we can run 1501193323Sed // convertToThreeAddress() at register allocation time. 1502193323Sed if (AM.hasSymbolicDisplacement()) { 1503193323Sed // For X86-64, we should always use lea to materialize RIP relative 1504193323Sed // addresses. 1505193323Sed if (Subtarget->is64Bit()) 1506193323Sed Complexity = 4; 1507193323Sed else 1508193323Sed Complexity += 2; 1509193323Sed } 1510193323Sed 1511207618Srdivacky if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode())) 1512193323Sed Complexity++; 1513193323Sed 1514198090Srdivacky // If it isn't worth using an LEA, reject it. 1515198090Srdivacky if (Complexity <= 2) 1516198090Srdivacky return false; 1517239462Sdim 1518198090Srdivacky getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1519198090Srdivacky return true; 1520193323Sed} 1521193323Sed 1522194612Sed/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1523218893Sdimbool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, 1524194612Sed SDValue &Scale, SDValue &Index, 1525210299Sed SDValue &Disp, SDValue &Segment) { 1526194612Sed assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1527194612Sed const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1528239462Sdim 1529194612Sed X86ISelAddressMode AM; 1530194612Sed AM.GV = GA->getGlobal(); 1531194612Sed AM.Disp += GA->getOffset(); 1532207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, N.getValueType()); 1533195098Sed AM.SymbolFlags = GA->getTargetFlags(); 1534195098Sed 1535194612Sed if (N.getValueType() == MVT::i32) { 1536194612Sed AM.Scale = 1; 1537194612Sed AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1538194612Sed } else { 1539194612Sed AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1540194612Sed } 1541239462Sdim 1542194612Sed getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1543194612Sed return true; 1544194612Sed} 1545194612Sed 1546194612Sed 1547202375Srdivackybool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, 1548193323Sed SDValue &Base, SDValue &Scale, 1549193323Sed SDValue &Index, SDValue &Disp, 1550193323Sed SDValue &Segment) { 1551204642Srdivacky if (!ISD::isNON_EXTLoad(N.getNode()) || 1552204642Srdivacky !IsProfitableToFold(N, P, P) || 1553207618Srdivacky !IsLegalToFold(N, P, P, OptLevel)) 1554204642Srdivacky return false; 1555239462Sdim 1556218893Sdim return SelectAddr(N.getNode(), 1557218893Sdim N.getOperand(1), Base, Scale, Index, Disp, Segment); 1558193323Sed} 1559193323Sed 1560193323Sed/// getGlobalBaseReg - Return an SDNode that returns the value of 1561193323Sed/// the global base register. Output instructions required to 1562193323Sed/// initialize the global base register, if necessary. 1563193323Sed/// 1564193323SedSDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1565193399Sed unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1566261991Sdim return CurDAG->getRegister(GlobalBaseReg, 1567261991Sdim getTargetLowering()->getPointerTy()).getNode(); 1568193323Sed} 1569193323Sed 1570193323SedSDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1571193323Sed SDValue Chain = Node->getOperand(0); 1572193323Sed SDValue In1 = Node->getOperand(1); 1573193323Sed SDValue In2L = Node->getOperand(2); 1574193323Sed SDValue In2H = Node->getOperand(3); 1575243830Sdim 1576193323Sed SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1577218893Sdim if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1578276479Sdim return nullptr; 1579198090Srdivacky MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1580198090Srdivacky MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1581198090Srdivacky const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; 1582261991Sdim SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), 1583251662Sdim MVT::i32, MVT::i32, MVT::Other, Ops); 1584198090Srdivacky cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); 1585198090Srdivacky return ResNode; 1586193323Sed} 1587193323Sed 1588243830Sdim/// Atomic opcode table 1589243830Sdim/// 1590223017Sdimenum AtomicOpc { 1591243830Sdim ADD, 1592243830Sdim SUB, 1593243830Sdim INC, 1594243830Sdim DEC, 1595223017Sdim OR, 1596223017Sdim AND, 1597223017Sdim XOR, 1598223017Sdim AtomicOpcEnd 1599223017Sdim}; 1600223017Sdim 1601223017Sdimenum AtomicSz { 1602223017Sdim ConstantI8, 1603223017Sdim I8, 1604223017Sdim SextConstantI16, 1605223017Sdim ConstantI16, 1606223017Sdim I16, 1607223017Sdim SextConstantI32, 1608223017Sdim ConstantI32, 1609223017Sdim I32, 1610223017Sdim SextConstantI64, 1611223017Sdim ConstantI64, 1612223017Sdim I64, 1613223017Sdim AtomicSzEnd 1614223017Sdim}; 1615223017Sdim 1616234353Sdimstatic const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { 1617223017Sdim { 1618243830Sdim X86::LOCK_ADD8mi, 1619243830Sdim X86::LOCK_ADD8mr, 1620243830Sdim X86::LOCK_ADD16mi8, 1621243830Sdim X86::LOCK_ADD16mi, 1622243830Sdim X86::LOCK_ADD16mr, 1623243830Sdim X86::LOCK_ADD32mi8, 1624243830Sdim X86::LOCK_ADD32mi, 1625243830Sdim X86::LOCK_ADD32mr, 1626243830Sdim X86::LOCK_ADD64mi8, 1627243830Sdim X86::LOCK_ADD64mi32, 1628243830Sdim X86::LOCK_ADD64mr, 1629243830Sdim }, 1630243830Sdim { 1631243830Sdim X86::LOCK_SUB8mi, 1632243830Sdim X86::LOCK_SUB8mr, 1633243830Sdim X86::LOCK_SUB16mi8, 1634243830Sdim X86::LOCK_SUB16mi, 1635243830Sdim X86::LOCK_SUB16mr, 1636243830Sdim X86::LOCK_SUB32mi8, 1637243830Sdim X86::LOCK_SUB32mi, 1638243830Sdim X86::LOCK_SUB32mr, 1639243830Sdim X86::LOCK_SUB64mi8, 1640243830Sdim X86::LOCK_SUB64mi32, 1641243830Sdim X86::LOCK_SUB64mr, 1642243830Sdim }, 1643243830Sdim { 1644243830Sdim 0, 1645243830Sdim X86::LOCK_INC8m, 1646243830Sdim 0, 1647243830Sdim 0, 1648243830Sdim X86::LOCK_INC16m, 1649243830Sdim 0, 1650243830Sdim 0, 1651243830Sdim X86::LOCK_INC32m, 1652243830Sdim 0, 1653243830Sdim 0, 1654243830Sdim X86::LOCK_INC64m, 1655243830Sdim }, 1656243830Sdim { 1657243830Sdim 0, 1658243830Sdim X86::LOCK_DEC8m, 1659243830Sdim 0, 1660243830Sdim 0, 1661243830Sdim X86::LOCK_DEC16m, 1662243830Sdim 0, 1663243830Sdim 0, 1664243830Sdim X86::LOCK_DEC32m, 1665243830Sdim 0, 1666243830Sdim 0, 1667243830Sdim X86::LOCK_DEC64m, 1668243830Sdim }, 1669243830Sdim { 1670223017Sdim X86::LOCK_OR8mi, 1671223017Sdim X86::LOCK_OR8mr, 1672223017Sdim X86::LOCK_OR16mi8, 1673223017Sdim X86::LOCK_OR16mi, 1674223017Sdim X86::LOCK_OR16mr, 1675223017Sdim X86::LOCK_OR32mi8, 1676223017Sdim X86::LOCK_OR32mi, 1677223017Sdim X86::LOCK_OR32mr, 1678223017Sdim X86::LOCK_OR64mi8, 1679223017Sdim X86::LOCK_OR64mi32, 1680243830Sdim X86::LOCK_OR64mr, 1681223017Sdim }, 1682223017Sdim { 1683223017Sdim X86::LOCK_AND8mi, 1684223017Sdim X86::LOCK_AND8mr, 1685223017Sdim X86::LOCK_AND16mi8, 1686223017Sdim X86::LOCK_AND16mi, 1687223017Sdim X86::LOCK_AND16mr, 1688223017Sdim X86::LOCK_AND32mi8, 1689223017Sdim X86::LOCK_AND32mi, 1690223017Sdim X86::LOCK_AND32mr, 1691223017Sdim X86::LOCK_AND64mi8, 1692223017Sdim X86::LOCK_AND64mi32, 1693243830Sdim X86::LOCK_AND64mr, 1694223017Sdim }, 1695223017Sdim { 1696223017Sdim X86::LOCK_XOR8mi, 1697223017Sdim X86::LOCK_XOR8mr, 1698223017Sdim X86::LOCK_XOR16mi8, 1699223017Sdim X86::LOCK_XOR16mi, 1700223017Sdim X86::LOCK_XOR16mr, 1701223017Sdim X86::LOCK_XOR32mi8, 1702223017Sdim X86::LOCK_XOR32mi, 1703223017Sdim X86::LOCK_XOR32mr, 1704223017Sdim X86::LOCK_XOR64mi8, 1705223017Sdim X86::LOCK_XOR64mi32, 1706243830Sdim X86::LOCK_XOR64mr, 1707223017Sdim } 1708223017Sdim}; 1709223017Sdim 1710243830Sdim// Return the target constant operand for atomic-load-op and do simple 1711243830Sdim// translations, such as from atomic-load-add to lock-sub. The return value is 1712243830Sdim// one of the following 3 cases: 1713243830Sdim// + target-constant, the operand could be supported as a target constant. 1714243830Sdim// + empty, the operand is not needed any more with the new op selected. 1715243830Sdim// + non-empty, otherwise. 1716243830Sdimstatic SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, 1717261991Sdim SDLoc dl, 1718261991Sdim enum AtomicOpc &Op, MVT NVT, 1719243830Sdim SDValue Val) { 1720243830Sdim if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) { 1721243830Sdim int64_t CNVal = CN->getSExtValue(); 1722243830Sdim // Quit if not 32-bit imm. 1723243830Sdim if ((int32_t)CNVal != CNVal) 1724243830Sdim return Val; 1725243830Sdim // For atomic-load-add, we could do some optimizations. 1726243830Sdim if (Op == ADD) { 1727243830Sdim // Translate to INC/DEC if ADD by 1 or -1. 1728243830Sdim if ((CNVal == 1) || (CNVal == -1)) { 1729243830Sdim Op = (CNVal == 1) ? INC : DEC; 1730243830Sdim // No more constant operand after being translated into INC/DEC. 1731243830Sdim return SDValue(); 1732243830Sdim } 1733243830Sdim // Translate to SUB if ADD by negative value. 1734243830Sdim if (CNVal < 0) { 1735243830Sdim Op = SUB; 1736243830Sdim CNVal = -CNVal; 1737243830Sdim } 1738243830Sdim } 1739243830Sdim return CurDAG->getTargetConstant(CNVal, NVT); 1740243830Sdim } 1741243830Sdim 1742243830Sdim // If the value operand is single-used, try to optimize it. 1743243830Sdim if (Op == ADD && Val.hasOneUse()) { 1744243830Sdim // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). 1745243830Sdim if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { 1746243830Sdim Op = SUB; 1747243830Sdim return Val.getOperand(1); 1748243830Sdim } 1749243830Sdim // A special case for i16, which needs truncating as, in most cases, it's 1750243830Sdim // promoted to i32. We will translate 1751243830Sdim // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) 1752243830Sdim if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && 1753243830Sdim Val.getOperand(0).getOpcode() == ISD::SUB && 1754243830Sdim X86::isZeroNode(Val.getOperand(0).getOperand(0))) { 1755243830Sdim Op = SUB; 1756243830Sdim Val = Val.getOperand(0); 1757243830Sdim return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, 1758243830Sdim Val.getOperand(1)); 1759243830Sdim } 1760243830Sdim } 1761243830Sdim 1762243830Sdim return Val; 1763243830Sdim} 1764243830Sdim 1765261991SdimSDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { 1766223017Sdim if (Node->hasAnyUseOfValue(0)) 1767276479Sdim return nullptr; 1768239462Sdim 1769261991Sdim SDLoc dl(Node); 1770243830Sdim 1771223017Sdim // Optimize common patterns for __sync_or_and_fetch and similar arith 1772223017Sdim // operations where the result is not used. This allows us to use the "lock" 1773223017Sdim // version of the arithmetic instruction. 1774223017Sdim SDValue Chain = Node->getOperand(0); 1775223017Sdim SDValue Ptr = Node->getOperand(1); 1776223017Sdim SDValue Val = Node->getOperand(2); 1777223017Sdim SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1778223017Sdim if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1779276479Sdim return nullptr; 1780223017Sdim 1781223017Sdim // Which index into the table. 1782223017Sdim enum AtomicOpc Op; 1783223017Sdim switch (Node->getOpcode()) { 1784243830Sdim default: 1785276479Sdim return nullptr; 1786223017Sdim case ISD::ATOMIC_LOAD_OR: 1787223017Sdim Op = OR; 1788223017Sdim break; 1789223017Sdim case ISD::ATOMIC_LOAD_AND: 1790223017Sdim Op = AND; 1791223017Sdim break; 1792223017Sdim case ISD::ATOMIC_LOAD_XOR: 1793223017Sdim Op = XOR; 1794223017Sdim break; 1795243830Sdim case ISD::ATOMIC_LOAD_ADD: 1796243830Sdim Op = ADD; 1797243830Sdim break; 1798223017Sdim } 1799251662Sdim 1800243830Sdim Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); 1801243830Sdim bool isUnOp = !Val.getNode(); 1802243830Sdim bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); 1803239462Sdim 1804223017Sdim unsigned Opc = 0; 1805261991Sdim switch (NVT.SimpleTy) { 1806276479Sdim default: return nullptr; 1807223017Sdim case MVT::i8: 1808223017Sdim if (isCN) 1809223017Sdim Opc = AtomicOpcTbl[Op][ConstantI8]; 1810223017Sdim else 1811223017Sdim Opc = AtomicOpcTbl[Op][I8]; 1812223017Sdim break; 1813223017Sdim case MVT::i16: 1814223017Sdim if (isCN) { 1815223017Sdim if (immSext8(Val.getNode())) 1816223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI16]; 1817223017Sdim else 1818223017Sdim Opc = AtomicOpcTbl[Op][ConstantI16]; 1819223017Sdim } else 1820223017Sdim Opc = AtomicOpcTbl[Op][I16]; 1821223017Sdim break; 1822223017Sdim case MVT::i32: 1823223017Sdim if (isCN) { 1824223017Sdim if (immSext8(Val.getNode())) 1825223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI32]; 1826223017Sdim else 1827223017Sdim Opc = AtomicOpcTbl[Op][ConstantI32]; 1828223017Sdim } else 1829223017Sdim Opc = AtomicOpcTbl[Op][I32]; 1830223017Sdim break; 1831223017Sdim case MVT::i64: 1832224145Sdim Opc = AtomicOpcTbl[Op][I64]; 1833223017Sdim if (isCN) { 1834223017Sdim if (immSext8(Val.getNode())) 1835223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI64]; 1836223017Sdim else if (i64immSExt32(Val.getNode())) 1837223017Sdim Opc = AtomicOpcTbl[Op][ConstantI64]; 1838224145Sdim } 1839223017Sdim break; 1840223017Sdim } 1841239462Sdim 1842224145Sdim assert(Opc != 0 && "Invalid arith lock transform!"); 1843224145Sdim 1844243830Sdim SDValue Ret; 1845223017Sdim SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 1846223017Sdim dl, NVT), 0); 1847223017Sdim MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1848223017Sdim MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1849243830Sdim if (isUnOp) { 1850243830Sdim SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; 1851251662Sdim Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); 1852243830Sdim } else { 1853243830Sdim SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; 1854251662Sdim Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); 1855243830Sdim } 1856223017Sdim cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1857223017Sdim SDValue RetVals[] = { Undef, Ret }; 1858276479Sdim return CurDAG->getMergeValues(RetVals, dl).getNode(); 1859223017Sdim} 1860223017Sdim 1861198090Srdivacky/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has 1862198090Srdivacky/// any uses which require the SF or OF bits to be accurate. 1863198090Srdivackystatic bool HasNoSignedComparisonUses(SDNode *N) { 1864198090Srdivacky // Examine each user of the node. 1865198090Srdivacky for (SDNode::use_iterator UI = N->use_begin(), 1866198090Srdivacky UE = N->use_end(); UI != UE; ++UI) { 1867198090Srdivacky // Only examine CopyToReg uses. 1868198090Srdivacky if (UI->getOpcode() != ISD::CopyToReg) 1869198090Srdivacky return false; 1870198090Srdivacky // Only examine CopyToReg uses that copy to EFLAGS. 1871198090Srdivacky if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 1872198090Srdivacky X86::EFLAGS) 1873198090Srdivacky return false; 1874198090Srdivacky // Examine each user of the CopyToReg use. 1875198090Srdivacky for (SDNode::use_iterator FlagUI = UI->use_begin(), 1876198090Srdivacky FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 1877198090Srdivacky // Only examine the Flag result. 1878198090Srdivacky if (FlagUI.getUse().getResNo() != 1) continue; 1879198090Srdivacky // Anything unusual: assume conservatively. 1880198090Srdivacky if (!FlagUI->isMachineOpcode()) return false; 1881198090Srdivacky // Examine the opcode of the user. 1882198090Srdivacky switch (FlagUI->getMachineOpcode()) { 1883198090Srdivacky // These comparisons don't treat the most significant bit specially. 1884198090Srdivacky case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 1885198090Srdivacky case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 1886198090Srdivacky case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 1887198090Srdivacky case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 1888203954Srdivacky case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: 1889203954Srdivacky case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: 1890198090Srdivacky case X86::CMOVA16rr: case X86::CMOVA16rm: 1891198090Srdivacky case X86::CMOVA32rr: case X86::CMOVA32rm: 1892198090Srdivacky case X86::CMOVA64rr: case X86::CMOVA64rm: 1893198090Srdivacky case X86::CMOVAE16rr: case X86::CMOVAE16rm: 1894198090Srdivacky case X86::CMOVAE32rr: case X86::CMOVAE32rm: 1895198090Srdivacky case X86::CMOVAE64rr: case X86::CMOVAE64rm: 1896198090Srdivacky case X86::CMOVB16rr: case X86::CMOVB16rm: 1897198090Srdivacky case X86::CMOVB32rr: case X86::CMOVB32rm: 1898198090Srdivacky case X86::CMOVB64rr: case X86::CMOVB64rm: 1899198090Srdivacky case X86::CMOVBE16rr: case X86::CMOVBE16rm: 1900198090Srdivacky case X86::CMOVBE32rr: case X86::CMOVBE32rm: 1901198090Srdivacky case X86::CMOVBE64rr: case X86::CMOVBE64rm: 1902198090Srdivacky case X86::CMOVE16rr: case X86::CMOVE16rm: 1903198090Srdivacky case X86::CMOVE32rr: case X86::CMOVE32rm: 1904198090Srdivacky case X86::CMOVE64rr: case X86::CMOVE64rm: 1905198090Srdivacky case X86::CMOVNE16rr: case X86::CMOVNE16rm: 1906198090Srdivacky case X86::CMOVNE32rr: case X86::CMOVNE32rm: 1907198090Srdivacky case X86::CMOVNE64rr: case X86::CMOVNE64rm: 1908198090Srdivacky case X86::CMOVNP16rr: case X86::CMOVNP16rm: 1909198090Srdivacky case X86::CMOVNP32rr: case X86::CMOVNP32rm: 1910198090Srdivacky case X86::CMOVNP64rr: case X86::CMOVNP64rm: 1911198090Srdivacky case X86::CMOVP16rr: case X86::CMOVP16rm: 1912198090Srdivacky case X86::CMOVP32rr: case X86::CMOVP32rm: 1913198090Srdivacky case X86::CMOVP64rr: case X86::CMOVP64rm: 1914198090Srdivacky continue; 1915198090Srdivacky // Anything else: assume conservatively. 1916198090Srdivacky default: return false; 1917198090Srdivacky } 1918198090Srdivacky } 1919198090Srdivacky } 1920198090Srdivacky return true; 1921198090Srdivacky} 1922198090Srdivacky 1923234353Sdim/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode 1924234353Sdim/// is suitable for doing the {load; increment or decrement; store} to modify 1925234353Sdim/// transformation. 1926239462Sdimstatic bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, 1927234353Sdim SDValue StoredVal, SelectionDAG *CurDAG, 1928234353Sdim LoadSDNode* &LoadNode, SDValue &InputChain) { 1929234353Sdim 1930234353Sdim // is the value stored the result of a DEC or INC? 1931234353Sdim if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false; 1932234353Sdim 1933234353Sdim // is the stored value result 0 of the load? 1934234353Sdim if (StoredVal.getResNo() != 0) return false; 1935234353Sdim 1936234353Sdim // are there other uses of the loaded value than the inc or dec? 1937234353Sdim if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; 1938234353Sdim 1939234353Sdim // is the store non-extending and non-indexed? 1940234353Sdim if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) 1941234353Sdim return false; 1942234353Sdim 1943234353Sdim SDValue Load = StoredVal->getOperand(0); 1944234353Sdim // Is the stored value a non-extending and non-indexed load? 1945234353Sdim if (!ISD::isNormalLoad(Load.getNode())) return false; 1946234353Sdim 1947234353Sdim // Return LoadNode by reference. 1948234353Sdim LoadNode = cast<LoadSDNode>(Load); 1949234353Sdim // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) 1950239462Sdim EVT LdVT = LoadNode->getMemoryVT(); 1951239462Sdim if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && 1952234353Sdim LdVT != MVT::i8) 1953234353Sdim return false; 1954234353Sdim 1955234353Sdim // Is store the only read of the loaded value? 1956234353Sdim if (!Load.hasOneUse()) 1957234353Sdim return false; 1958239462Sdim 1959234353Sdim // Is the address of the store the same as the load? 1960234353Sdim if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || 1961234353Sdim LoadNode->getOffset() != StoreNode->getOffset()) 1962234353Sdim return false; 1963234353Sdim 1964234353Sdim // Check if the chain is produced by the load or is a TokenFactor with 1965234353Sdim // the load output chain as an operand. Return InputChain by reference. 1966234353Sdim SDValue Chain = StoreNode->getChain(); 1967234353Sdim 1968234353Sdim bool ChainCheck = false; 1969234353Sdim if (Chain == Load.getValue(1)) { 1970234353Sdim ChainCheck = true; 1971234353Sdim InputChain = LoadNode->getChain(); 1972234353Sdim } else if (Chain.getOpcode() == ISD::TokenFactor) { 1973234353Sdim SmallVector<SDValue, 4> ChainOps; 1974234353Sdim for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { 1975234353Sdim SDValue Op = Chain.getOperand(i); 1976234353Sdim if (Op == Load.getValue(1)) { 1977234353Sdim ChainCheck = true; 1978234353Sdim continue; 1979234353Sdim } 1980239462Sdim 1981239462Sdim // Make sure using Op as part of the chain would not cause a cycle here. 1982239462Sdim // In theory, we could check whether the chain node is a predecessor of 1983239462Sdim // the load. But that can be very expensive. Instead visit the uses and 1984239462Sdim // make sure they all have smaller node id than the load. 1985239462Sdim int LoadId = LoadNode->getNodeId(); 1986239462Sdim for (SDNode::use_iterator UI = Op.getNode()->use_begin(), 1987239462Sdim UE = UI->use_end(); UI != UE; ++UI) { 1988239462Sdim if (UI.getUse().getResNo() != 0) 1989239462Sdim continue; 1990239462Sdim if (UI->getNodeId() > LoadId) 1991239462Sdim return false; 1992239462Sdim } 1993239462Sdim 1994234353Sdim ChainOps.push_back(Op); 1995234353Sdim } 1996234353Sdim 1997234353Sdim if (ChainCheck) 1998234353Sdim // Make a new TokenFactor with all the other input chains except 1999234353Sdim // for the load. 2000261991Sdim InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), 2001276479Sdim MVT::Other, ChainOps); 2002234353Sdim } 2003234353Sdim if (!ChainCheck) 2004234353Sdim return false; 2005234353Sdim 2006234353Sdim return true; 2007234353Sdim} 2008234353Sdim 2009234353Sdim/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory 2010234353Sdim/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC. 2011234353Sdimstatic unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { 2012234353Sdim if (Opc == X86ISD::DEC) { 2013234353Sdim if (LdVT == MVT::i64) return X86::DEC64m; 2014234353Sdim if (LdVT == MVT::i32) return X86::DEC32m; 2015234353Sdim if (LdVT == MVT::i16) return X86::DEC16m; 2016234353Sdim if (LdVT == MVT::i8) return X86::DEC8m; 2017234353Sdim } else { 2018234353Sdim assert(Opc == X86ISD::INC && "unrecognized opcode"); 2019234353Sdim if (LdVT == MVT::i64) return X86::INC64m; 2020234353Sdim if (LdVT == MVT::i32) return X86::INC32m; 2021234353Sdim if (LdVT == MVT::i16) return X86::INC16m; 2022234353Sdim if (LdVT == MVT::i8) return X86::INC8m; 2023234353Sdim } 2024234353Sdim llvm_unreachable("unrecognized size for LdVT"); 2025234353Sdim} 2026234353Sdim 2027239462Sdim/// SelectGather - Customized ISel for GATHER operations. 2028239462Sdim/// 2029239462SdimSDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { 2030239462Sdim // Operands of Gather: VSrc, Base, VIdx, VMask, Scale 2031239462Sdim SDValue Chain = Node->getOperand(0); 2032239462Sdim SDValue VSrc = Node->getOperand(2); 2033239462Sdim SDValue Base = Node->getOperand(3); 2034239462Sdim SDValue VIdx = Node->getOperand(4); 2035239462Sdim SDValue VMask = Node->getOperand(5); 2036239462Sdim ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); 2037239462Sdim if (!Scale) 2038276479Sdim return nullptr; 2039239462Sdim 2040239462Sdim SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), 2041239462Sdim MVT::Other); 2042239462Sdim 2043239462Sdim // Memory Operands: Base, Scale, Index, Disp, Segment 2044239462Sdim SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); 2045239462Sdim SDValue Segment = CurDAG->getRegister(0, MVT::i32); 2046239462Sdim const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, 2047239462Sdim Disp, Segment, VMask, Chain}; 2048261991Sdim SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops); 2049239462Sdim // Node has 2 outputs: VDst and MVT::Other. 2050239462Sdim // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. 2051239462Sdim // We replace VDst of Node with VDst of ResNode, and Other of Node with Other 2052239462Sdim // of ResNode. 2053239462Sdim ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); 2054239462Sdim ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2)); 2055239462Sdim return ResNode; 2056239462Sdim} 2057239462Sdim 2058202375SrdivackySDNode *X86DAGToDAGISel::Select(SDNode *Node) { 2059261991Sdim MVT NVT = Node->getSimpleValueType(0); 2060193323Sed unsigned Opc, MOpc; 2061193323Sed unsigned Opcode = Node->getOpcode(); 2062261991Sdim SDLoc dl(Node); 2063239462Sdim 2064204642Srdivacky DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); 2065193323Sed 2066193323Sed if (Node->isMachineOpcode()) { 2067204642Srdivacky DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); 2068255804Sdim Node->setNodeId(-1); 2069276479Sdim return nullptr; // Already selected. 2070193323Sed } 2071193323Sed 2072193323Sed switch (Opcode) { 2073198090Srdivacky default: break; 2074239462Sdim case ISD::INTRINSIC_W_CHAIN: { 2075239462Sdim unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2076239462Sdim switch (IntNo) { 2077239462Sdim default: break; 2078239462Sdim case Intrinsic::x86_avx2_gather_d_pd: 2079239462Sdim case Intrinsic::x86_avx2_gather_d_pd_256: 2080239462Sdim case Intrinsic::x86_avx2_gather_q_pd: 2081239462Sdim case Intrinsic::x86_avx2_gather_q_pd_256: 2082239462Sdim case Intrinsic::x86_avx2_gather_d_ps: 2083239462Sdim case Intrinsic::x86_avx2_gather_d_ps_256: 2084239462Sdim case Intrinsic::x86_avx2_gather_q_ps: 2085239462Sdim case Intrinsic::x86_avx2_gather_q_ps_256: 2086239462Sdim case Intrinsic::x86_avx2_gather_d_q: 2087239462Sdim case Intrinsic::x86_avx2_gather_d_q_256: 2088239462Sdim case Intrinsic::x86_avx2_gather_q_q: 2089239462Sdim case Intrinsic::x86_avx2_gather_q_q_256: 2090239462Sdim case Intrinsic::x86_avx2_gather_d_d: 2091239462Sdim case Intrinsic::x86_avx2_gather_d_d_256: 2092239462Sdim case Intrinsic::x86_avx2_gather_q_d: 2093239462Sdim case Intrinsic::x86_avx2_gather_q_d_256: { 2094261991Sdim if (!Subtarget->hasAVX2()) 2095261991Sdim break; 2096239462Sdim unsigned Opc; 2097239462Sdim switch (IntNo) { 2098239462Sdim default: llvm_unreachable("Impossible intrinsic"); 2099239462Sdim case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; 2100239462Sdim case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; 2101239462Sdim case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; 2102239462Sdim case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; 2103239462Sdim case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; 2104239462Sdim case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; 2105239462Sdim case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; 2106239462Sdim case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; 2107239462Sdim case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; 2108239462Sdim case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; 2109239462Sdim case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; 2110239462Sdim case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; 2111239462Sdim case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; 2112239462Sdim case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; 2113239462Sdim case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; 2114239462Sdim case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; 2115239462Sdim } 2116239462Sdim SDNode *RetVal = SelectGather(Node, Opc); 2117239462Sdim if (RetVal) 2118239462Sdim // We already called ReplaceUses inside SelectGather. 2119276479Sdim return nullptr; 2120239462Sdim break; 2121239462Sdim } 2122239462Sdim } 2123239462Sdim break; 2124239462Sdim } 2125198090Srdivacky case X86ISD::GlobalBaseReg: 2126198090Srdivacky return getGlobalBaseReg(); 2127193323Sed 2128239462Sdim 2129223017Sdim case ISD::ATOMIC_LOAD_XOR: 2130223017Sdim case ISD::ATOMIC_LOAD_AND: 2131243830Sdim case ISD::ATOMIC_LOAD_OR: 2132243830Sdim case ISD::ATOMIC_LOAD_ADD: { 2133223017Sdim SDNode *RetVal = SelectAtomicLoadArith(Node, NVT); 2134223017Sdim if (RetVal) 2135223017Sdim return RetVal; 2136223017Sdim break; 2137223017Sdim } 2138221345Sdim case ISD::AND: 2139221345Sdim case ISD::OR: 2140221345Sdim case ISD::XOR: { 2141221345Sdim // For operations of the form (x << C1) op C2, check if we can use a smaller 2142221345Sdim // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. 2143221345Sdim SDValue N0 = Node->getOperand(0); 2144221345Sdim SDValue N1 = Node->getOperand(1); 2145221345Sdim 2146221345Sdim if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse()) 2147221345Sdim break; 2148221345Sdim 2149221345Sdim // i8 is unshrinkable, i16 should be promoted to i32. 2150221345Sdim if (NVT != MVT::i32 && NVT != MVT::i64) 2151221345Sdim break; 2152221345Sdim 2153221345Sdim ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 2154221345Sdim ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2155221345Sdim if (!Cst || !ShlCst) 2156221345Sdim break; 2157221345Sdim 2158221345Sdim int64_t Val = Cst->getSExtValue(); 2159221345Sdim uint64_t ShlVal = ShlCst->getZExtValue(); 2160221345Sdim 2161221345Sdim // Make sure that we don't change the operation by removing bits. 2162221345Sdim // This only matters for OR and XOR, AND is unaffected. 2163243830Sdim uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1; 2164243830Sdim if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 2165221345Sdim break; 2166221345Sdim 2167239462Sdim unsigned ShlOp, Op; 2168261991Sdim MVT CstVT = NVT; 2169221345Sdim 2170221345Sdim // Check the minimum bitwidth for the new constant. 2171221345Sdim // TODO: AND32ri is the same as AND64ri32 with zext imm. 2172221345Sdim // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr 2173221345Sdim // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. 2174221345Sdim if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal)) 2175221345Sdim CstVT = MVT::i8; 2176221345Sdim else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal)) 2177221345Sdim CstVT = MVT::i32; 2178221345Sdim 2179221345Sdim // Bail if there is no smaller encoding. 2180221345Sdim if (NVT == CstVT) 2181221345Sdim break; 2182221345Sdim 2183261991Sdim switch (NVT.SimpleTy) { 2184221345Sdim default: llvm_unreachable("Unsupported VT!"); 2185221345Sdim case MVT::i32: 2186221345Sdim assert(CstVT == MVT::i8); 2187221345Sdim ShlOp = X86::SHL32ri; 2188221345Sdim 2189221345Sdim switch (Opcode) { 2190239462Sdim default: llvm_unreachable("Impossible opcode"); 2191221345Sdim case ISD::AND: Op = X86::AND32ri8; break; 2192221345Sdim case ISD::OR: Op = X86::OR32ri8; break; 2193221345Sdim case ISD::XOR: Op = X86::XOR32ri8; break; 2194221345Sdim } 2195221345Sdim break; 2196221345Sdim case MVT::i64: 2197221345Sdim assert(CstVT == MVT::i8 || CstVT == MVT::i32); 2198221345Sdim ShlOp = X86::SHL64ri; 2199221345Sdim 2200221345Sdim switch (Opcode) { 2201239462Sdim default: llvm_unreachable("Impossible opcode"); 2202221345Sdim case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; 2203221345Sdim case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; 2204221345Sdim case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; 2205221345Sdim } 2206221345Sdim break; 2207221345Sdim } 2208221345Sdim 2209221345Sdim // Emit the smaller op and the shift. 2210221345Sdim SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT); 2211221345Sdim SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); 2212221345Sdim return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), 2213221345Sdim getI8Imm(ShlVal)); 2214221345Sdim } 2215218893Sdim case X86ISD::UMUL: { 2216218893Sdim SDValue N0 = Node->getOperand(0); 2217218893Sdim SDValue N1 = Node->getOperand(1); 2218239462Sdim 2219218893Sdim unsigned LoReg; 2220261991Sdim switch (NVT.SimpleTy) { 2221218893Sdim default: llvm_unreachable("Unsupported VT!"); 2222218893Sdim case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; 2223218893Sdim case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; 2224218893Sdim case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; 2225218893Sdim case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; 2226218893Sdim } 2227239462Sdim 2228218893Sdim SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 2229218893Sdim N0, SDValue()).getValue(1); 2230239462Sdim 2231218893Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); 2232218893Sdim SDValue Ops[] = {N1, InFlag}; 2233251662Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2234239462Sdim 2235218893Sdim ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); 2236218893Sdim ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); 2237218893Sdim ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); 2238276479Sdim return nullptr; 2239218893Sdim } 2240239462Sdim 2241198090Srdivacky case ISD::SMUL_LOHI: 2242198090Srdivacky case ISD::UMUL_LOHI: { 2243198090Srdivacky SDValue N0 = Node->getOperand(0); 2244198090Srdivacky SDValue N1 = Node->getOperand(1); 2245193323Sed 2246198090Srdivacky bool isSigned = Opcode == ISD::SMUL_LOHI; 2247243830Sdim bool hasBMI2 = Subtarget->hasBMI2(); 2248198090Srdivacky if (!isSigned) { 2249261991Sdim switch (NVT.SimpleTy) { 2250198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2251198090Srdivacky case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 2252198090Srdivacky case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 2253243830Sdim case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; 2254243830Sdim MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; 2255243830Sdim case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; 2256243830Sdim MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; 2257193323Sed } 2258198090Srdivacky } else { 2259261991Sdim switch (NVT.SimpleTy) { 2260198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2261198090Srdivacky case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 2262198090Srdivacky case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 2263198090Srdivacky case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 2264198090Srdivacky case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 2265193323Sed } 2266198090Srdivacky } 2267193323Sed 2268243830Sdim unsigned SrcReg, LoReg, HiReg; 2269243830Sdim switch (Opc) { 2270243830Sdim default: llvm_unreachable("Unknown MUL opcode!"); 2271243830Sdim case X86::IMUL8r: 2272243830Sdim case X86::MUL8r: 2273243830Sdim SrcReg = LoReg = X86::AL; HiReg = X86::AH; 2274243830Sdim break; 2275243830Sdim case X86::IMUL16r: 2276243830Sdim case X86::MUL16r: 2277243830Sdim SrcReg = LoReg = X86::AX; HiReg = X86::DX; 2278243830Sdim break; 2279243830Sdim case X86::IMUL32r: 2280243830Sdim case X86::MUL32r: 2281243830Sdim SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; 2282243830Sdim break; 2283243830Sdim case X86::IMUL64r: 2284243830Sdim case X86::MUL64r: 2285243830Sdim SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; 2286243830Sdim break; 2287243830Sdim case X86::MULX32rr: 2288243830Sdim SrcReg = X86::EDX; LoReg = HiReg = 0; 2289243830Sdim break; 2290243830Sdim case X86::MULX64rr: 2291243830Sdim SrcReg = X86::RDX; LoReg = HiReg = 0; 2292243830Sdim break; 2293198090Srdivacky } 2294193323Sed 2295198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 2296202375Srdivacky bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2297198090Srdivacky // Multiply is commmutative. 2298198090Srdivacky if (!foldedLoad) { 2299202375Srdivacky foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2300198090Srdivacky if (foldedLoad) 2301198090Srdivacky std::swap(N0, N1); 2302198090Srdivacky } 2303193323Sed 2304243830Sdim SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, 2305239462Sdim N0, SDValue()).getValue(1); 2306243830Sdim SDValue ResHi, ResLo; 2307198090Srdivacky 2308198090Srdivacky if (foldedLoad) { 2309243830Sdim SDValue Chain; 2310198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 2311198090Srdivacky InFlag }; 2312243830Sdim if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { 2313243830Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); 2314251662Sdim SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); 2315243830Sdim ResHi = SDValue(CNode, 0); 2316243830Sdim ResLo = SDValue(CNode, 1); 2317243830Sdim Chain = SDValue(CNode, 2); 2318243830Sdim InFlag = SDValue(CNode, 3); 2319243830Sdim } else { 2320243830Sdim SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 2321251662Sdim SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); 2322243830Sdim Chain = SDValue(CNode, 0); 2323243830Sdim InFlag = SDValue(CNode, 1); 2324243830Sdim } 2325218893Sdim 2326198090Srdivacky // Update the chain. 2327243830Sdim ReplaceUses(N1.getValue(1), Chain); 2328198090Srdivacky } else { 2329243830Sdim SDValue Ops[] = { N1, InFlag }; 2330243830Sdim if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { 2331243830Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); 2332251662Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2333243830Sdim ResHi = SDValue(CNode, 0); 2334243830Sdim ResLo = SDValue(CNode, 1); 2335243830Sdim InFlag = SDValue(CNode, 2); 2336243830Sdim } else { 2337243830Sdim SDVTList VTs = CurDAG->getVTList(MVT::Glue); 2338251662Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2339243830Sdim InFlag = SDValue(CNode, 0); 2340243830Sdim } 2341198090Srdivacky } 2342198090Srdivacky 2343210299Sed // Prevent use of AH in a REX instruction by referencing AX instead. 2344210299Sed if (HiReg == X86::AH && Subtarget->is64Bit() && 2345210299Sed !SDValue(Node, 1).use_empty()) { 2346210299Sed SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2347210299Sed X86::AX, MVT::i16, InFlag); 2348210299Sed InFlag = Result.getValue(2); 2349210299Sed // Get the low part if needed. Don't use getCopyFromReg for aliasing 2350210299Sed // registers. 2351210299Sed if (!SDValue(Node, 0).use_empty()) 2352210299Sed ReplaceUses(SDValue(Node, 1), 2353210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2354210299Sed 2355210299Sed // Shift AX down 8 bits. 2356210299Sed Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2357210299Sed Result, 2358210299Sed CurDAG->getTargetConstant(8, MVT::i8)), 0); 2359210299Sed // Then truncate it down to i8. 2360210299Sed ReplaceUses(SDValue(Node, 1), 2361210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2362210299Sed } 2363198090Srdivacky // Copy the low half of the result, if it is needed. 2364202375Srdivacky if (!SDValue(Node, 0).use_empty()) { 2365276479Sdim if (!ResLo.getNode()) { 2366243830Sdim assert(LoReg && "Register for low half is not defined!"); 2367243830Sdim ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, 2368243830Sdim InFlag); 2369243830Sdim InFlag = ResLo.getValue(2); 2370243830Sdim } 2371243830Sdim ReplaceUses(SDValue(Node, 0), ResLo); 2372243830Sdim DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); 2373198090Srdivacky } 2374198090Srdivacky // Copy the high half of the result, if it is needed. 2375202375Srdivacky if (!SDValue(Node, 1).use_empty()) { 2376276479Sdim if (!ResHi.getNode()) { 2377243830Sdim assert(HiReg && "Register for high half is not defined!"); 2378243830Sdim ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, 2379243830Sdim InFlag); 2380243830Sdim InFlag = ResHi.getValue(2); 2381243830Sdim } 2382243830Sdim ReplaceUses(SDValue(Node, 1), ResHi); 2383243830Sdim DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); 2384198090Srdivacky } 2385239462Sdim 2386276479Sdim return nullptr; 2387198090Srdivacky } 2388193323Sed 2389198090Srdivacky case ISD::SDIVREM: 2390198090Srdivacky case ISD::UDIVREM: { 2391198090Srdivacky SDValue N0 = Node->getOperand(0); 2392198090Srdivacky SDValue N1 = Node->getOperand(1); 2393193323Sed 2394198090Srdivacky bool isSigned = Opcode == ISD::SDIVREM; 2395198090Srdivacky if (!isSigned) { 2396261991Sdim switch (NVT.SimpleTy) { 2397198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2398198090Srdivacky case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 2399198090Srdivacky case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 2400198090Srdivacky case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 2401198090Srdivacky case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 2402193323Sed } 2403198090Srdivacky } else { 2404261991Sdim switch (NVT.SimpleTy) { 2405198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2406198090Srdivacky case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 2407198090Srdivacky case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 2408198090Srdivacky case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 2409198090Srdivacky case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 2410198090Srdivacky } 2411198090Srdivacky } 2412193323Sed 2413201360Srdivacky unsigned LoReg, HiReg, ClrReg; 2414261991Sdim unsigned SExtOpcode; 2415261991Sdim switch (NVT.SimpleTy) { 2416198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2417198090Srdivacky case MVT::i8: 2418201360Srdivacky LoReg = X86::AL; ClrReg = HiReg = X86::AH; 2419198090Srdivacky SExtOpcode = X86::CBW; 2420198090Srdivacky break; 2421198090Srdivacky case MVT::i16: 2422198090Srdivacky LoReg = X86::AX; HiReg = X86::DX; 2423261991Sdim ClrReg = X86::DX; 2424198090Srdivacky SExtOpcode = X86::CWD; 2425198090Srdivacky break; 2426198090Srdivacky case MVT::i32: 2427201360Srdivacky LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; 2428198090Srdivacky SExtOpcode = X86::CDQ; 2429198090Srdivacky break; 2430198090Srdivacky case MVT::i64: 2431201360Srdivacky LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; 2432198090Srdivacky SExtOpcode = X86::CQO; 2433198090Srdivacky break; 2434198090Srdivacky } 2435193323Sed 2436198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 2437202375Srdivacky bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2438198090Srdivacky bool signBitIsZero = CurDAG->SignBitIsZero(N0); 2439198090Srdivacky 2440198090Srdivacky SDValue InFlag; 2441198090Srdivacky if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 2442198090Srdivacky // Special case for div8, just use a move with zero extension to AX to 2443198090Srdivacky // clear the upper 8 bits (AH). 2444198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 2445202375Srdivacky if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 2446198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 2447198090Srdivacky Move = 2448223017Sdim SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, 2449251662Sdim MVT::Other, Ops), 0); 2450198090Srdivacky Chain = Move.getValue(1); 2451198090Srdivacky ReplaceUses(N0.getValue(1), Chain); 2452193323Sed } else { 2453198090Srdivacky Move = 2454223017Sdim SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0); 2455198090Srdivacky Chain = CurDAG->getEntryNode(); 2456198090Srdivacky } 2457223017Sdim Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue()); 2458198090Srdivacky InFlag = Chain.getValue(1); 2459198090Srdivacky } else { 2460198090Srdivacky InFlag = 2461198090Srdivacky CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 2462198090Srdivacky LoReg, N0, SDValue()).getValue(1); 2463198090Srdivacky if (isSigned && !signBitIsZero) { 2464198090Srdivacky // Sign extend the low part into the high part. 2465193323Sed InFlag = 2466218893Sdim SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); 2467198090Srdivacky } else { 2468198090Srdivacky // Zero out the high part, effectively zero extending the input. 2469261991Sdim SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); 2470261991Sdim switch (NVT.SimpleTy) { 2471261991Sdim case MVT::i16: 2472261991Sdim ClrNode = 2473261991Sdim SDValue(CurDAG->getMachineNode( 2474261991Sdim TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, 2475261991Sdim CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)), 2476261991Sdim 0); 2477261991Sdim break; 2478261991Sdim case MVT::i32: 2479261991Sdim break; 2480261991Sdim case MVT::i64: 2481261991Sdim ClrNode = 2482261991Sdim SDValue(CurDAG->getMachineNode( 2483261991Sdim TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, 2484261991Sdim CurDAG->getTargetConstant(0, MVT::i64), ClrNode, 2485261991Sdim CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 2486261991Sdim 0); 2487261991Sdim break; 2488261991Sdim default: 2489261991Sdim llvm_unreachable("Unexpected division source"); 2490261991Sdim } 2491261991Sdim 2492201360Srdivacky InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, 2493198090Srdivacky ClrNode, InFlag).getValue(1); 2494193323Sed } 2495198090Srdivacky } 2496193323Sed 2497198090Srdivacky if (foldedLoad) { 2498198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 2499198090Srdivacky InFlag }; 2500198090Srdivacky SDNode *CNode = 2501251662Sdim CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); 2502198090Srdivacky InFlag = SDValue(CNode, 1); 2503198090Srdivacky // Update the chain. 2504198090Srdivacky ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 2505198090Srdivacky } else { 2506198090Srdivacky InFlag = 2507218893Sdim SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); 2508198090Srdivacky } 2509198090Srdivacky 2510210299Sed // Prevent use of AH in a REX instruction by referencing AX instead. 2511210299Sed // Shift it down 8 bits. 2512261991Sdim // 2513261991Sdim // The current assumption of the register allocator is that isel 2514261991Sdim // won't generate explicit references to the GPR8_NOREX registers. If 2515261991Sdim // the allocator and/or the backend get enhanced to be more robust in 2516261991Sdim // that regard, this can be, and should be, removed. 2517210299Sed if (HiReg == X86::AH && Subtarget->is64Bit() && 2518210299Sed !SDValue(Node, 1).use_empty()) { 2519210299Sed SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2520210299Sed X86::AX, MVT::i16, InFlag); 2521210299Sed InFlag = Result.getValue(2); 2522210299Sed 2523210299Sed // If we also need AL (the quotient), get it by extracting a subreg from 2524210299Sed // Result. The fast register allocator does not like multiple CopyFromReg 2525210299Sed // nodes using aliasing registers. 2526210299Sed if (!SDValue(Node, 0).use_empty()) 2527210299Sed ReplaceUses(SDValue(Node, 0), 2528210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2529210299Sed 2530210299Sed // Shift AX right by 8 bits instead of using AH. 2531210299Sed Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2532210299Sed Result, 2533210299Sed CurDAG->getTargetConstant(8, MVT::i8)), 2534210299Sed 0); 2535210299Sed ReplaceUses(SDValue(Node, 1), 2536210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2537210299Sed } 2538198090Srdivacky // Copy the division (low) result, if it is needed. 2539202375Srdivacky if (!SDValue(Node, 0).use_empty()) { 2540198090Srdivacky SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2541198090Srdivacky LoReg, NVT, InFlag); 2542198090Srdivacky InFlag = Result.getValue(2); 2543202375Srdivacky ReplaceUses(SDValue(Node, 0), Result); 2544204642Srdivacky DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 2545198090Srdivacky } 2546198090Srdivacky // Copy the remainder (high) result, if it is needed. 2547202375Srdivacky if (!SDValue(Node, 1).use_empty()) { 2548210299Sed SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2549210299Sed HiReg, NVT, InFlag); 2550210299Sed InFlag = Result.getValue(2); 2551202375Srdivacky ReplaceUses(SDValue(Node, 1), Result); 2552204642Srdivacky DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 2553198090Srdivacky } 2554276479Sdim return nullptr; 2555198090Srdivacky } 2556193323Sed 2557239462Sdim case X86ISD::CMP: 2558239462Sdim case X86ISD::SUB: { 2559239462Sdim // Sometimes a SUB is used to perform comparison. 2560239462Sdim if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) 2561239462Sdim // This node is not a CMP. 2562239462Sdim break; 2563198090Srdivacky SDValue N0 = Node->getOperand(0); 2564198090Srdivacky SDValue N1 = Node->getOperand(1); 2565198090Srdivacky 2566198090Srdivacky // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 2567198090Srdivacky // use a smaller encoding. 2568212904Sdim if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 2569212904Sdim HasNoSignedComparisonUses(Node)) 2570207618Srdivacky // Look past the truncate if CMP is the only use of it. 2571207618Srdivacky N0 = N0.getOperand(0); 2572234353Sdim if ((N0.getNode()->getOpcode() == ISD::AND || 2573234353Sdim (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && 2574234353Sdim N0.getNode()->hasOneUse() && 2575198090Srdivacky N0.getValueType() != MVT::i8 && 2576198090Srdivacky X86::isZeroNode(N1)) { 2577198090Srdivacky ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 2578198090Srdivacky if (!C) break; 2579198090Srdivacky 2580198090Srdivacky // For example, convert "testl %eax, $8" to "testb %al, $8" 2581198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 2582198090Srdivacky (!(C->getZExtValue() & 0x80) || 2583198090Srdivacky HasNoSignedComparisonUses(Node))) { 2584198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); 2585198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2586198090Srdivacky 2587198090Srdivacky // On x86-32, only the ABCD registers have 8-bit subregisters. 2588198090Srdivacky if (!Subtarget->is64Bit()) { 2589234353Sdim const TargetRegisterClass *TRC; 2590261991Sdim switch (N0.getSimpleValueType().SimpleTy) { 2591198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2592198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2593198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2594198090Srdivacky } 2595198090Srdivacky SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2596198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2597198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2598198090Srdivacky } 2599198090Srdivacky 2600198090Srdivacky // Extract the l-register. 2601208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, 2602198090Srdivacky MVT::i8, Reg); 2603198090Srdivacky 2604198090Srdivacky // Emit a testb. 2605243830Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 2606243830Sdim Subreg, Imm); 2607243830Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2608243830Sdim // one, do not call ReplaceAllUsesWith. 2609243830Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2610243830Sdim SDValue(NewNode, 0)); 2611276479Sdim return nullptr; 2612193323Sed } 2613198090Srdivacky 2614198090Srdivacky // For example, "testl %eax, $2048" to "testb %ah, $8". 2615198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 2616198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2617198090Srdivacky HasNoSignedComparisonUses(Node))) { 2618198090Srdivacky // Shift the immediate right by 8 bits. 2619198090Srdivacky SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 2620198090Srdivacky MVT::i8); 2621198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2622198090Srdivacky 2623198090Srdivacky // Put the value in an ABCD register. 2624234353Sdim const TargetRegisterClass *TRC; 2625261991Sdim switch (N0.getSimpleValueType().SimpleTy) { 2626198090Srdivacky case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 2627198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2628198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2629198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2630198090Srdivacky } 2631198090Srdivacky SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2632198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2633198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2634198090Srdivacky 2635198090Srdivacky // Extract the h-register. 2636208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, 2637198090Srdivacky MVT::i8, Reg); 2638198090Srdivacky 2639226633Sdim // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only 2640226633Sdim // target GR8_NOREX registers, so make sure the register class is 2641226633Sdim // forced. 2642243830Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, 2643243830Sdim MVT::i32, Subreg, ShiftedImm); 2644243830Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2645243830Sdim // one, do not call ReplaceAllUsesWith. 2646243830Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2647243830Sdim SDValue(NewNode, 0)); 2648276479Sdim return nullptr; 2649193323Sed } 2650198090Srdivacky 2651198090Srdivacky // For example, "testl %eax, $32776" to "testw %ax, $32776". 2652198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 2653198090Srdivacky N0.getValueType() != MVT::i16 && 2654198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2655198090Srdivacky HasNoSignedComparisonUses(Node))) { 2656198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); 2657198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2658198090Srdivacky 2659198090Srdivacky // Extract the 16-bit subregister. 2660208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, 2661198090Srdivacky MVT::i16, Reg); 2662198090Srdivacky 2663198090Srdivacky // Emit a testw. 2664243830Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, 2665243830Sdim Subreg, Imm); 2666243830Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2667243830Sdim // one, do not call ReplaceAllUsesWith. 2668243830Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2669243830Sdim SDValue(NewNode, 0)); 2670276479Sdim return nullptr; 2671193323Sed } 2672198090Srdivacky 2673198090Srdivacky // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 2674198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 2675198090Srdivacky N0.getValueType() == MVT::i64 && 2676198090Srdivacky (!(C->getZExtValue() & 0x80000000) || 2677198090Srdivacky HasNoSignedComparisonUses(Node))) { 2678198090Srdivacky SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 2679198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2680198090Srdivacky 2681198090Srdivacky // Extract the 32-bit subregister. 2682208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, 2683198090Srdivacky MVT::i32, Reg); 2684198090Srdivacky 2685198090Srdivacky // Emit a testl. 2686243830Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, 2687243830Sdim Subreg, Imm); 2688243830Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2689243830Sdim // one, do not call ReplaceAllUsesWith. 2690243830Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2691243830Sdim SDValue(NewNode, 0)); 2692276479Sdim return nullptr; 2693198090Srdivacky } 2694193323Sed } 2695198090Srdivacky break; 2696193323Sed } 2697234353Sdim case ISD::STORE: { 2698234353Sdim // Change a chain of {load; incr or dec; store} of the same value into 2699234353Sdim // a simple increment or decrement through memory of that value, if the 2700234353Sdim // uses of the modified value and its address are suitable. 2701234353Sdim // The DEC64m tablegen pattern is currently not able to match the case where 2702239462Sdim // the EFLAGS on the original DEC are used. (This also applies to 2703234353Sdim // {INC,DEC}X{64,32,16,8}.) 2704234353Sdim // We'll need to improve tablegen to allow flags to be transferred from a 2705234353Sdim // node in the pattern to the result node. probably with a new keyword 2706234353Sdim // for example, we have this 2707234353Sdim // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", 2708234353Sdim // [(store (add (loadi64 addr:$dst), -1), addr:$dst), 2709234353Sdim // (implicit EFLAGS)]>; 2710234353Sdim // but maybe need something like this 2711234353Sdim // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", 2712234353Sdim // [(store (add (loadi64 addr:$dst), -1), addr:$dst), 2713234353Sdim // (transferrable EFLAGS)]>; 2714234353Sdim 2715234353Sdim StoreSDNode *StoreNode = cast<StoreSDNode>(Node); 2716234353Sdim SDValue StoredVal = StoreNode->getOperand(1); 2717234353Sdim unsigned Opc = StoredVal->getOpcode(); 2718234353Sdim 2719276479Sdim LoadSDNode *LoadNode = nullptr; 2720234353Sdim SDValue InputChain; 2721234353Sdim if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, 2722234353Sdim LoadNode, InputChain)) 2723234353Sdim break; 2724234353Sdim 2725234353Sdim SDValue Base, Scale, Index, Disp, Segment; 2726234353Sdim if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), 2727234353Sdim Base, Scale, Index, Disp, Segment)) 2728234353Sdim break; 2729234353Sdim 2730234353Sdim MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2); 2731234353Sdim MemOp[0] = StoreNode->getMemOperand(); 2732234353Sdim MemOp[1] = LoadNode->getMemOperand(); 2733234353Sdim const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; 2734239462Sdim EVT LdVT = LoadNode->getMemoryVT(); 2735234353Sdim unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); 2736234353Sdim MachineSDNode *Result = CurDAG->getMachineNode(newOpc, 2737261991Sdim SDLoc(Node), 2738251662Sdim MVT::i32, MVT::Other, Ops); 2739234353Sdim Result->setMemRefs(MemOp, MemOp + 2); 2740234353Sdim 2741234353Sdim ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); 2742234353Sdim ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); 2743234353Sdim 2744234353Sdim return Result; 2745198090Srdivacky } 2746234353Sdim } 2747193323Sed 2748202375Srdivacky SDNode *ResNode = SelectCode(Node); 2749193323Sed 2750204642Srdivacky DEBUG(dbgs() << "=> "; 2751276479Sdim if (ResNode == nullptr || ResNode == Node) 2752204642Srdivacky Node->dump(CurDAG); 2753204642Srdivacky else 2754204642Srdivacky ResNode->dump(CurDAG); 2755204642Srdivacky dbgs() << '\n'); 2756193323Sed 2757193323Sed return ResNode; 2758193323Sed} 2759193323Sed 2760193323Sedbool X86DAGToDAGISel:: 2761193323SedSelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 2762193323Sed std::vector<SDValue> &OutOps) { 2763193323Sed SDValue Op0, Op1, Op2, Op3, Op4; 2764193323Sed switch (ConstraintCode) { 2765193323Sed case 'o': // offsetable ?? 2766193323Sed case 'v': // not offsetable ?? 2767193323Sed default: return true; 2768193323Sed case 'm': // memory 2769276479Sdim if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) 2770193323Sed return true; 2771193323Sed break; 2772193323Sed } 2773239462Sdim 2774193323Sed OutOps.push_back(Op0); 2775193323Sed OutOps.push_back(Op1); 2776193323Sed OutOps.push_back(Op2); 2777193323Sed OutOps.push_back(Op3); 2778193323Sed OutOps.push_back(Op4); 2779193323Sed return false; 2780193323Sed} 2781193323Sed 2782239462Sdim/// createX86ISelDag - This pass converts a legalized DAG into a 2783193323Sed/// X86-specific DAG, ready for instruction scheduling. 2784193323Sed/// 2785193323SedFunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 2786234353Sdim CodeGenOpt::Level OptLevel) { 2787193323Sed return new X86DAGToDAGISel(TM, OptLevel); 2788193323Sed} 2789