1193323Sed//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file defines a DAG pattern matching instruction selector for X86, 11193323Sed// converting from a legalized dag to a X86 dag. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#include "X86.h" 16193323Sed#include "X86InstrBuilder.h" 17193323Sed#include "X86MachineFunctionInfo.h" 18193323Sed#include "X86RegisterInfo.h" 19193323Sed#include "X86Subtarget.h" 20193323Sed#include "X86TargetMachine.h" 21249423Sdim#include "llvm/ADT/Statistic.h" 22249423Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 23193323Sed#include "llvm/CodeGen/MachineFunction.h" 24193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h" 25193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 26193323Sed#include "llvm/CodeGen/SelectionDAGISel.h" 27280031Sdim#include "llvm/IR/Function.h" 28249423Sdim#include "llvm/IR/Instructions.h" 29249423Sdim#include "llvm/IR/Intrinsics.h" 30249423Sdim#include "llvm/IR/Type.h" 31193323Sed#include "llvm/Support/Debug.h" 32198090Srdivacky#include "llvm/Support/ErrorHandling.h" 33193323Sed#include "llvm/Support/MathExtras.h" 34198090Srdivacky#include "llvm/Support/raw_ostream.h" 35249423Sdim#include "llvm/Target/TargetMachine.h" 36249423Sdim#include "llvm/Target/TargetOptions.h" 37280031Sdim#include <stdint.h> 38193323Sedusing namespace llvm; 39193323Sed 40276479Sdim#define DEBUG_TYPE "x86-isel" 41276479Sdim 42193323SedSTATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 43193323Sed 44193323Sed//===----------------------------------------------------------------------===// 45193323Sed// Pattern Matcher Implementation 46193323Sed//===----------------------------------------------------------------------===// 47193323Sed 48193323Sednamespace { 49296417Sdim /// This corresponds to X86AddressMode, but uses SDValue's instead of register 50296417Sdim /// numbers for the leaves of the matched tree. 51193323Sed struct X86ISelAddressMode { 52193323Sed enum { 53193323Sed RegBase, 54193323Sed FrameIndexBase 55193323Sed } BaseType; 56193323Sed 57207618Srdivacky // This is really a union, discriminated by BaseType! 58207618Srdivacky SDValue Base_Reg; 59207618Srdivacky int Base_FrameIndex; 60193323Sed 61193323Sed unsigned Scale; 62239462Sdim SDValue IndexReg; 63193323Sed int32_t Disp; 64193323Sed SDValue Segment; 65207618Srdivacky const GlobalValue *GV; 66207618Srdivacky const Constant *CP; 67207618Srdivacky const BlockAddress *BlockAddr; 68193323Sed const char *ES; 69288943Sdim MCSymbol *MCSym; 70193323Sed int JT; 71193323Sed unsigned Align; // CP alignment. 72195098Sed unsigned char SymbolFlags; // X86II::MO_* 73193323Sed 74193323Sed X86ISelAddressMode() 75288943Sdim : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), 76288943Sdim Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), 77288943Sdim MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {} 78193323Sed 79193323Sed bool hasSymbolicDisplacement() const { 80276479Sdim return GV != nullptr || CP != nullptr || ES != nullptr || 81288943Sdim MCSym != nullptr || JT != -1 || BlockAddr != nullptr; 82193323Sed } 83239462Sdim 84195098Sed bool hasBaseOrIndexReg() const { 85261991Sdim return BaseType == FrameIndexBase || 86276479Sdim IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; 87195098Sed } 88239462Sdim 89296417Sdim /// Return true if this addressing mode is already RIP-relative. 90195098Sed bool isRIPRelative() const { 91195098Sed if (BaseType != RegBase) return false; 92195098Sed if (RegisterSDNode *RegNode = 93207618Srdivacky dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) 94195098Sed return RegNode->getReg() == X86::RIP; 95195098Sed return false; 96195098Sed } 97239462Sdim 98195098Sed void setBaseReg(SDValue Reg) { 99195098Sed BaseType = RegBase; 100207618Srdivacky Base_Reg = Reg; 101195098Sed } 102193323Sed 103243830Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 104193323Sed void dump() { 105202375Srdivacky dbgs() << "X86ISelAddressMode " << this << '\n'; 106207618Srdivacky dbgs() << "Base_Reg "; 107276479Sdim if (Base_Reg.getNode()) 108239462Sdim Base_Reg.getNode()->dump(); 109198090Srdivacky else 110202375Srdivacky dbgs() << "nul"; 111207618Srdivacky dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' 112198090Srdivacky << " Scale" << Scale << '\n' 113198090Srdivacky << "IndexReg "; 114276479Sdim if (IndexReg.getNode()) 115198090Srdivacky IndexReg.getNode()->dump(); 116198090Srdivacky else 117239462Sdim dbgs() << "nul"; 118202375Srdivacky dbgs() << " Disp " << Disp << '\n' 119198090Srdivacky << "GV "; 120198090Srdivacky if (GV) 121198090Srdivacky GV->dump(); 122198090Srdivacky else 123202375Srdivacky dbgs() << "nul"; 124202375Srdivacky dbgs() << " CP "; 125198090Srdivacky if (CP) 126198090Srdivacky CP->dump(); 127198090Srdivacky else 128202375Srdivacky dbgs() << "nul"; 129202375Srdivacky dbgs() << '\n' 130198090Srdivacky << "ES "; 131198090Srdivacky if (ES) 132202375Srdivacky dbgs() << ES; 133198090Srdivacky else 134202375Srdivacky dbgs() << "nul"; 135288943Sdim dbgs() << " MCSym "; 136288943Sdim if (MCSym) 137288943Sdim dbgs() << MCSym; 138288943Sdim else 139288943Sdim dbgs() << "nul"; 140202375Srdivacky dbgs() << " JT" << JT << " Align" << Align << '\n'; 141193323Sed } 142243830Sdim#endif 143193323Sed }; 144193323Sed} 145193323Sed 146193323Sednamespace { 147193323Sed //===--------------------------------------------------------------------===// 148296417Sdim /// ISel - X86-specific code to select X86 machine instructions for 149193323Sed /// SelectionDAG operations. 150193323Sed /// 151276479Sdim class X86DAGToDAGISel final : public SelectionDAGISel { 152296417Sdim /// Keep a pointer to the X86Subtarget around so that we can 153193323Sed /// make the right decision when generating code for different targets. 154193323Sed const X86Subtarget *Subtarget; 155193323Sed 156296417Sdim /// If true, selector should try to optimize for code size instead of 157296417Sdim /// performance. 158193323Sed bool OptForSize; 159193323Sed 160193323Sed public: 161193323Sed explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 162288943Sdim : SelectionDAGISel(tm, OptLevel), OptForSize(false) {} 163193323Sed 164276479Sdim const char *getPassName() const override { 165193323Sed return "X86 DAG->DAG Instruction Selection"; 166193323Sed } 167193323Sed 168276479Sdim bool runOnMachineFunction(MachineFunction &MF) override { 169276479Sdim // Reset the subtarget each time through. 170288943Sdim Subtarget = &MF.getSubtarget<X86Subtarget>(); 171276479Sdim SelectionDAGISel::runOnMachineFunction(MF); 172276479Sdim return true; 173276479Sdim } 174193323Sed 175276479Sdim void EmitFunctionEntryCode() override; 176193323Sed 177276479Sdim bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; 178203954Srdivacky 179276479Sdim void PreprocessISelDAG() override; 180276479Sdim 181212904Sdim inline bool immSext8(SDNode *N) const { 182212904Sdim return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue()); 183212904Sdim } 184212904Sdim 185296417Sdim // True if the 64-bit immediate fits in a 32-bit sign-extended field. 186212904Sdim inline bool i64immSExt32(SDNode *N) const { 187212904Sdim uint64_t v = cast<ConstantSDNode>(N)->getZExtValue(); 188212904Sdim return (int64_t)v == (int32_t)v; 189212904Sdim } 190212904Sdim 191193323Sed// Include the pieces autogenerated from the target description. 192193323Sed#include "X86GenDAGISel.inc" 193193323Sed 194193323Sed private: 195276479Sdim SDNode *Select(SDNode *N) override; 196296417Sdim SDNode *selectGather(SDNode *N, unsigned Opc); 197296417Sdim SDNode *selectAtomicLoadArith(SDNode *Node, MVT NVT); 198193323Sed 199296417Sdim bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); 200296417Sdim bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); 201296417Sdim bool matchWrapper(SDValue N, X86ISelAddressMode &AM); 202296417Sdim bool matchAddress(SDValue N, X86ISelAddressMode &AM); 203296417Sdim bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth); 204296417Sdim bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 205198090Srdivacky unsigned Depth); 206296417Sdim bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); 207296417Sdim bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, 208193323Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 209193323Sed SDValue &Segment); 210296417Sdim bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, 211288943Sdim SDValue &Scale, SDValue &Index, SDValue &Disp, 212288943Sdim SDValue &Segment); 213296417Sdim bool selectMOV64Imm32(SDValue N, SDValue &Imm); 214296417Sdim bool selectLEAAddr(SDValue N, SDValue &Base, 215210299Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 216210299Sed SDValue &Segment); 217296417Sdim bool selectLEA64_32Addr(SDValue N, SDValue &Base, 218261991Sdim SDValue &Scale, SDValue &Index, SDValue &Disp, 219261991Sdim SDValue &Segment); 220296417Sdim bool selectTLSADDRAddr(SDValue N, SDValue &Base, 221210299Sed SDValue &Scale, SDValue &Index, SDValue &Disp, 222210299Sed SDValue &Segment); 223296417Sdim bool selectScalarSSELoad(SDNode *Root, SDValue N, 224204642Srdivacky SDValue &Base, SDValue &Scale, 225193323Sed SDValue &Index, SDValue &Disp, 226193323Sed SDValue &Segment, 227204642Srdivacky SDValue &NodeWithChain); 228239462Sdim 229296417Sdim bool tryFoldLoad(SDNode *P, SDValue N, 230193323Sed SDValue &Base, SDValue &Scale, 231193323Sed SDValue &Index, SDValue &Disp, 232193323Sed SDValue &Segment); 233239462Sdim 234296417Sdim /// Implement addressing mode selection for inline asm expressions. 235276479Sdim bool SelectInlineAsmMemoryOperand(const SDValue &Op, 236288943Sdim unsigned ConstraintID, 237276479Sdim std::vector<SDValue> &OutOps) override; 238239462Sdim 239296417Sdim void emitSpecialCodeForMain(); 240193323Sed 241288943Sdim inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL, 242288943Sdim SDValue &Base, SDValue &Scale, 243288943Sdim SDValue &Index, SDValue &Disp, 244288943Sdim SDValue &Segment) { 245280031Sdim Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 246288943Sdim ? CurDAG->getTargetFrameIndex( 247288943Sdim AM.Base_FrameIndex, 248288943Sdim TLI->getPointerTy(CurDAG->getDataLayout())) 249280031Sdim : AM.Base_Reg; 250288943Sdim Scale = getI8Imm(AM.Scale, DL); 251193323Sed Index = AM.IndexReg; 252296417Sdim // These are 32-bit even in 64-bit mode since RIP-relative offset 253193323Sed // is 32-bit. 254193323Sed if (AM.GV) 255261991Sdim Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), 256210299Sed MVT::i32, AM.Disp, 257195098Sed AM.SymbolFlags); 258193323Sed else if (AM.CP) 259193323Sed Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 260195098Sed AM.Align, AM.Disp, AM.SymbolFlags); 261243830Sdim else if (AM.ES) { 262243830Sdim assert(!AM.Disp && "Non-zero displacement is ignored with ES."); 263195098Sed Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 264288943Sdim } else if (AM.MCSym) { 265288943Sdim assert(!AM.Disp && "Non-zero displacement is ignored with MCSym."); 266288943Sdim assert(AM.SymbolFlags == 0 && "oo"); 267288943Sdim Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32); 268243830Sdim } else if (AM.JT != -1) { 269243830Sdim assert(!AM.Disp && "Non-zero displacement is ignored with JT."); 270195098Sed Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 271243830Sdim } else if (AM.BlockAddr) 272243830Sdim Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, 273243830Sdim AM.SymbolFlags); 274193323Sed else 275288943Sdim Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32); 276193323Sed 277193323Sed if (AM.Segment.getNode()) 278193323Sed Segment = AM.Segment; 279193323Sed else 280193323Sed Segment = CurDAG->getRegister(0, MVT::i32); 281193323Sed } 282193323Sed 283296417Sdim // Utility function to determine whether we should avoid selecting 284296417Sdim // immediate forms of instructions for better code size or not. 285296417Sdim // At a high level, we'd like to avoid such instructions when 286296417Sdim // we have similar constants used within the same basic block 287296417Sdim // that can be kept in a register. 288296417Sdim // 289296417Sdim bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { 290296417Sdim uint32_t UseCount = 0; 291296417Sdim 292296417Sdim // Do not want to hoist if we're not optimizing for size. 293296417Sdim // TODO: We'd like to remove this restriction. 294296417Sdim // See the comment in X86InstrInfo.td for more info. 295296417Sdim if (!OptForSize) 296296417Sdim return false; 297296417Sdim 298296417Sdim // Walk all the users of the immediate. 299296417Sdim for (SDNode::use_iterator UI = N->use_begin(), 300296417Sdim UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) { 301296417Sdim 302296417Sdim SDNode *User = *UI; 303296417Sdim 304296417Sdim // This user is already selected. Count it as a legitimate use and 305296417Sdim // move on. 306296417Sdim if (User->isMachineOpcode()) { 307296417Sdim UseCount++; 308296417Sdim continue; 309296417Sdim } 310296417Sdim 311296417Sdim // We want to count stores of immediates as real uses. 312296417Sdim if (User->getOpcode() == ISD::STORE && 313296417Sdim User->getOperand(1).getNode() == N) { 314296417Sdim UseCount++; 315296417Sdim continue; 316296417Sdim } 317296417Sdim 318296417Sdim // We don't currently match users that have > 2 operands (except 319296417Sdim // for stores, which are handled above) 320296417Sdim // Those instruction won't match in ISEL, for now, and would 321296417Sdim // be counted incorrectly. 322296417Sdim // This may change in the future as we add additional instruction 323296417Sdim // types. 324296417Sdim if (User->getNumOperands() != 2) 325296417Sdim continue; 326296417Sdim 327296417Sdim // Immediates that are used for offsets as part of stack 328296417Sdim // manipulation should be left alone. These are typically 329296417Sdim // used to indicate SP offsets for argument passing and 330296417Sdim // will get pulled into stores/pushes (implicitly). 331296417Sdim if (User->getOpcode() == X86ISD::ADD || 332296417Sdim User->getOpcode() == ISD::ADD || 333296417Sdim User->getOpcode() == X86ISD::SUB || 334296417Sdim User->getOpcode() == ISD::SUB) { 335296417Sdim 336296417Sdim // Find the other operand of the add/sub. 337296417Sdim SDValue OtherOp = User->getOperand(0); 338296417Sdim if (OtherOp.getNode() == N) 339296417Sdim OtherOp = User->getOperand(1); 340296417Sdim 341296417Sdim // Don't count if the other operand is SP. 342296417Sdim RegisterSDNode *RegNode; 343296417Sdim if (OtherOp->getOpcode() == ISD::CopyFromReg && 344296417Sdim (RegNode = dyn_cast_or_null<RegisterSDNode>( 345296417Sdim OtherOp->getOperand(1).getNode()))) 346296417Sdim if ((RegNode->getReg() == X86::ESP) || 347296417Sdim (RegNode->getReg() == X86::RSP)) 348296417Sdim continue; 349296417Sdim } 350296417Sdim 351296417Sdim // ... otherwise, count this and move on. 352296417Sdim UseCount++; 353296417Sdim } 354296417Sdim 355296417Sdim // If we have more than 1 use, then recommend for hoisting. 356296417Sdim return (UseCount > 1); 357296417Sdim } 358296417Sdim 359296417Sdim /// Return a target constant with the specified value of type i8. 360288943Sdim inline SDValue getI8Imm(unsigned Imm, SDLoc DL) { 361288943Sdim return CurDAG->getTargetConstant(Imm, DL, MVT::i8); 362193323Sed } 363193323Sed 364296417Sdim /// Return a target constant with the specified value, of type i32. 365288943Sdim inline SDValue getI32Imm(unsigned Imm, SDLoc DL) { 366288943Sdim return CurDAG->getTargetConstant(Imm, DL, MVT::i32); 367193323Sed } 368193323Sed 369296417Sdim /// Return an SDNode that returns the value of the global base register. 370296417Sdim /// Output instructions required to initialize the global base register, 371296417Sdim /// if necessary. 372193323Sed SDNode *getGlobalBaseReg(); 373193323Sed 374296417Sdim /// Return a reference to the TargetMachine, casted to the target-specific 375296417Sdim /// type. 376249423Sdim const X86TargetMachine &getTargetMachine() const { 377193399Sed return static_cast<const X86TargetMachine &>(TM); 378193399Sed } 379193399Sed 380296417Sdim /// Return a reference to the TargetInstrInfo, casted to the target-specific 381296417Sdim /// type. 382249423Sdim const X86InstrInfo *getInstrInfo() const { 383288943Sdim return Subtarget->getInstrInfo(); 384193399Sed } 385280031Sdim 386280031Sdim /// \brief Address-mode matching performs shift-of-and to and-of-shift 387280031Sdim /// reassociation in order to expose more scaled addressing 388280031Sdim /// opportunities. 389280031Sdim bool ComplexPatternFuncMutatesDAG() const override { 390280031Sdim return true; 391280031Sdim } 392193323Sed }; 393193323Sed} 394193323Sed 395193323Sed 396203954Srdivackybool 397203954SrdivackyX86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { 398193323Sed if (OptLevel == CodeGenOpt::None) return false; 399193323Sed 400203954Srdivacky if (!N.hasOneUse()) 401203954Srdivacky return false; 402203954Srdivacky 403203954Srdivacky if (N.getOpcode() != ISD::LOAD) 404203954Srdivacky return true; 405203954Srdivacky 406203954Srdivacky // If N is a load, do additional profitability checks. 407203954Srdivacky if (U == Root) { 408193323Sed switch (U->getOpcode()) { 409193323Sed default: break; 410202375Srdivacky case X86ISD::ADD: 411202375Srdivacky case X86ISD::SUB: 412202375Srdivacky case X86ISD::AND: 413202375Srdivacky case X86ISD::XOR: 414202375Srdivacky case X86ISD::OR: 415193323Sed case ISD::ADD: 416193323Sed case ISD::ADDC: 417193323Sed case ISD::ADDE: 418193323Sed case ISD::AND: 419193323Sed case ISD::OR: 420193323Sed case ISD::XOR: { 421193323Sed SDValue Op1 = U->getOperand(1); 422193323Sed 423193323Sed // If the other operand is a 8-bit immediate we should fold the immediate 424193323Sed // instead. This reduces code size. 425193323Sed // e.g. 426193323Sed // movl 4(%esp), %eax 427193323Sed // addl $4, %eax 428193323Sed // vs. 429193323Sed // movl $4, %eax 430193323Sed // addl 4(%esp), %eax 431193323Sed // The former is 2 bytes shorter. In case where the increment is 1, then 432193323Sed // the saving can be 4 bytes (by using incl %eax). 433193323Sed if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 434193323Sed if (Imm->getAPIntValue().isSignedIntN(8)) 435193323Sed return false; 436193323Sed 437193323Sed // If the other operand is a TLS address, we should fold it instead. 438193323Sed // This produces 439193323Sed // movl %gs:0, %eax 440193323Sed // leal i@NTPOFF(%eax), %eax 441193323Sed // instead of 442193323Sed // movl $i@NTPOFF, %eax 443193323Sed // addl %gs:0, %eax 444193323Sed // if the block also has an access to a second TLS address this will save 445193323Sed // a load. 446276479Sdim // FIXME: This is probably also true for non-TLS addresses. 447193323Sed if (Op1.getOpcode() == X86ISD::Wrapper) { 448193323Sed SDValue Val = Op1.getOperand(0); 449193323Sed if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 450193323Sed return false; 451193323Sed } 452193323Sed } 453193323Sed } 454203954Srdivacky } 455193323Sed 456203954Srdivacky return true; 457203954Srdivacky} 458203954Srdivacky 459296417Sdim/// Replace the original chain operand of the call with 460205218Srdivacky/// load's chain operand and move load below the call's chain operand. 461296417Sdimstatic void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, 462243830Sdim SDValue Call, SDValue OrigChain) { 463193323Sed SmallVector<SDValue, 8> Ops; 464205218Srdivacky SDValue Chain = OrigChain.getOperand(0); 465193323Sed if (Chain.getNode() == Load.getNode()) 466193323Sed Ops.push_back(Load.getOperand(0)); 467193323Sed else { 468193323Sed assert(Chain.getOpcode() == ISD::TokenFactor && 469205218Srdivacky "Unexpected chain operand"); 470193323Sed for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 471193323Sed if (Chain.getOperand(i).getNode() == Load.getNode()) 472193323Sed Ops.push_back(Load.getOperand(0)); 473193323Sed else 474193323Sed Ops.push_back(Chain.getOperand(i)); 475193323Sed SDValue NewChain = 476276479Sdim CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); 477193323Sed Ops.clear(); 478193323Sed Ops.push_back(NewChain); 479193323Sed } 480288943Sdim Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); 481276479Sdim CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); 482210299Sed CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), 483193323Sed Load.getOperand(1), Load.getOperand(2)); 484243830Sdim 485193323Sed Ops.clear(); 486193323Sed Ops.push_back(SDValue(Load.getNode(), 1)); 487288943Sdim Ops.append(Call->op_begin() + 1, Call->op_end()); 488276479Sdim CurDAG->UpdateNodeOperands(Call.getNode(), Ops); 489193323Sed} 490193323Sed 491296417Sdim/// Return true if call address is a load and it can be 492193323Sed/// moved below CALLSEQ_START and the chains leading up to the call. 493193323Sed/// Return the CALLSEQ_START by reference as a second output. 494205218Srdivacky/// In the case of a tail call, there isn't a callseq node between the call 495205218Srdivacky/// chain and the load. 496205218Srdivackystatic bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { 497243830Sdim // The transformation is somewhat dangerous if the call's chain was glued to 498243830Sdim // the call. After MoveBelowOrigChain the load is moved between the call and 499243830Sdim // the chain, this can create a cycle if the load is not folded. So it is 500243830Sdim // *really* important that we are sure the load will be folded. 501193323Sed if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 502193323Sed return false; 503193323Sed LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 504193323Sed if (!LD || 505193323Sed LD->isVolatile() || 506193323Sed LD->getAddressingMode() != ISD::UNINDEXED || 507193323Sed LD->getExtensionType() != ISD::NON_EXTLOAD) 508193323Sed return false; 509193323Sed 510193323Sed // Now let's find the callseq_start. 511205218Srdivacky while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { 512193323Sed if (!Chain.hasOneUse()) 513193323Sed return false; 514193323Sed Chain = Chain.getOperand(0); 515193323Sed } 516205218Srdivacky 517205218Srdivacky if (!Chain.getNumOperands()) 518205218Srdivacky return false; 519249423Sdim // Since we are not checking for AA here, conservatively abort if the chain 520249423Sdim // writes to memory. It's not safe to move the callee (a load) across a store. 521249423Sdim if (isa<MemSDNode>(Chain.getNode()) && 522249423Sdim cast<MemSDNode>(Chain.getNode())->writeMem()) 523249423Sdim return false; 524193323Sed if (Chain.getOperand(0).getNode() == Callee.getNode()) 525193323Sed return true; 526193323Sed if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 527198090Srdivacky Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 528198090Srdivacky Callee.getValue(1).hasOneUse()) 529193323Sed return true; 530193323Sed return false; 531193323Sed} 532193323Sed 533204642Srdivackyvoid X86DAGToDAGISel::PreprocessISelDAG() { 534204792Srdivacky // OptForSize is used in pattern predicates that isel is matching. 535296417Sdim OptForSize = MF->getFunction()->optForSize(); 536239462Sdim 537204642Srdivacky for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 538204642Srdivacky E = CurDAG->allnodes_end(); I != E; ) { 539296417Sdim SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 540193323Sed 541205218Srdivacky if (OptLevel != CodeGenOpt::None && 542249423Sdim // Only does this when target favors doesn't favor register indirect 543249423Sdim // call. 544249423Sdim ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || 545243830Sdim (N->getOpcode() == X86ISD::TC_RETURN && 546249423Sdim // Only does this if load can be folded into TC_RETURN. 547243830Sdim (Subtarget->is64Bit() || 548243830Sdim getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { 549193323Sed /// Also try moving call address load from outside callseq_start to just 550193323Sed /// before the call to allow it to be folded. 551193323Sed /// 552193323Sed /// [Load chain] 553193323Sed /// ^ 554193323Sed /// | 555193323Sed /// [Load] 556193323Sed /// ^ ^ 557193323Sed /// | | 558193323Sed /// / \-- 559193323Sed /// / | 560193323Sed ///[CALLSEQ_START] | 561193323Sed /// ^ | 562193323Sed /// | | 563193323Sed /// [LOAD/C2Reg] | 564193323Sed /// | | 565193323Sed /// \ / 566193323Sed /// \ / 567193323Sed /// [CALL] 568205218Srdivacky bool HasCallSeq = N->getOpcode() == X86ISD::CALL; 569204642Srdivacky SDValue Chain = N->getOperand(0); 570204642Srdivacky SDValue Load = N->getOperand(1); 571205218Srdivacky if (!isCalleeLoad(Load, Chain, HasCallSeq)) 572193323Sed continue; 573296417Sdim moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); 574193323Sed ++NumLoadMoved; 575193323Sed continue; 576193323Sed } 577239462Sdim 578204642Srdivacky // Lower fpround and fpextend nodes that target the FP stack to be store and 579204642Srdivacky // load to the stack. This is a gross hack. We would like to simply mark 580204642Srdivacky // these as being illegal, but when we do that, legalize produces these when 581204642Srdivacky // it expands calls, then expands these in the same legalize pass. We would 582204642Srdivacky // like dag combine to be able to hack on these between the call expansion 583204642Srdivacky // and the node legalization. As such this pass basically does "really 584204642Srdivacky // late" legalization of these inline with the X86 isel pass. 585204642Srdivacky // FIXME: This should only happen when not compiled with -O0. 586193323Sed if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 587193323Sed continue; 588239462Sdim 589261991Sdim MVT SrcVT = N->getOperand(0).getSimpleValueType(); 590261991Sdim MVT DstVT = N->getSimpleValueType(0); 591226633Sdim 592226633Sdim // If any of the sources are vectors, no fp stack involved. 593226633Sdim if (SrcVT.isVector() || DstVT.isVector()) 594226633Sdim continue; 595226633Sdim 596193323Sed // If the source and destination are SSE registers, then this is a legal 597193323Sed // conversion that should not be lowered. 598261991Sdim const X86TargetLowering *X86Lowering = 599280031Sdim static_cast<const X86TargetLowering *>(TLI); 600261991Sdim bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); 601261991Sdim bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); 602193323Sed if (SrcIsSSE && DstIsSSE) 603193323Sed continue; 604193323Sed 605193323Sed if (!SrcIsSSE && !DstIsSSE) { 606193323Sed // If this is an FPStack extension, it is a noop. 607193323Sed if (N->getOpcode() == ISD::FP_EXTEND) 608193323Sed continue; 609193323Sed // If this is a value-preserving FPStack truncation, it is a noop. 610193323Sed if (N->getConstantOperandVal(1)) 611193323Sed continue; 612193323Sed } 613239462Sdim 614193323Sed // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 615193323Sed // FPStack has extload and truncstore. SSE can fold direct loads into other 616193323Sed // operations. Based on this, decide what we want to do. 617261991Sdim MVT MemVT; 618193323Sed if (N->getOpcode() == ISD::FP_ROUND) 619193323Sed MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 620193323Sed else 621193323Sed MemVT = SrcIsSSE ? SrcVT : DstVT; 622239462Sdim 623193323Sed SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 624261991Sdim SDLoc dl(N); 625239462Sdim 626193323Sed // FIXME: optimize the case where the src/dest is a load or store? 627193323Sed SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 628193323Sed N->getOperand(0), 629218893Sdim MemTmp, MachinePointerInfo(), MemVT, 630203954Srdivacky false, false, 0); 631218893Sdim SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 632218893Sdim MachinePointerInfo(), 633280031Sdim MemVT, false, false, false, 0); 634193323Sed 635193323Sed // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 636193323Sed // extload we created. This will cause general havok on the dag because 637193323Sed // anything below the conversion could be folded into other existing nodes. 638193323Sed // To avoid invalidating 'I', back it up to the convert node. 639193323Sed --I; 640193323Sed CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 641239462Sdim 642193323Sed // Now that we did that, the node is dead. Increment the iterator to the 643193323Sed // next node to process, then delete N. 644193323Sed ++I; 645193323Sed CurDAG->DeleteNode(N); 646239462Sdim } 647193323Sed} 648193323Sed 649193323Sed 650296417Sdim/// Emit any code that needs to be executed only in the main function. 651296417Sdimvoid X86DAGToDAGISel::emitSpecialCodeForMain() { 652218893Sdim if (Subtarget->isTargetCygMing()) { 653288943Sdim TargetLowering::ArgListTy Args; 654288943Sdim auto &DL = CurDAG->getDataLayout(); 655288943Sdim 656288943Sdim TargetLowering::CallLoweringInfo CLI(*CurDAG); 657288943Sdim CLI.setChain(CurDAG->getRoot()) 658288943Sdim .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()), 659288943Sdim CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)), 660288943Sdim std::move(Args), 0); 661288943Sdim const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 662288943Sdim std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); 663288943Sdim CurDAG->setRoot(Result.second); 664218893Sdim } 665193323Sed} 666193323Sed 667207618Srdivackyvoid X86DAGToDAGISel::EmitFunctionEntryCode() { 668193323Sed // If this is main, emit special code for main. 669207618Srdivacky if (const Function *Fn = MF->getFunction()) 670207618Srdivacky if (Fn->hasExternalLinkage() && Fn->getName() == "main") 671296417Sdim emitSpecialCodeForMain(); 672193323Sed} 673193323Sed 674224145Sdimstatic bool isDispSafeForFrameIndex(int64_t Val) { 675224145Sdim // On 64-bit platforms, we can run into an issue where a frame index 676224145Sdim // includes a displacement that, when added to the explicit displacement, 677224145Sdim // will overflow the displacement field. Assuming that the frame index 678224145Sdim // displacement fits into a 31-bit integer (which is only slightly more 679224145Sdim // aggressive than the current fundamental assumption that it fits into 680224145Sdim // a 32-bit integer), a 31-bit disp should always be safe. 681224145Sdim return isInt<31>(Val); 682224145Sdim} 683193323Sed 684296417Sdimbool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, 685224145Sdim X86ISelAddressMode &AM) { 686288943Sdim // Cannot combine ExternalSymbol displacements with integer offsets. 687288943Sdim if (Offset != 0 && (AM.ES || AM.MCSym)) 688288943Sdim return true; 689224145Sdim int64_t Val = AM.Disp + Offset; 690224145Sdim CodeModel::Model M = TM.getCodeModel(); 691224145Sdim if (Subtarget->is64Bit()) { 692224145Sdim if (!X86::isOffsetSuitableForCodeModel(Val, M, 693224145Sdim AM.hasSymbolicDisplacement())) 694224145Sdim return true; 695224145Sdim // In addition to the checks required for a register base, check that 696224145Sdim // we do not try to use an unsafe Disp with a frame index. 697224145Sdim if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && 698224145Sdim !isDispSafeForFrameIndex(Val)) 699224145Sdim return true; 700224145Sdim } 701224145Sdim AM.Disp = Val; 702224145Sdim return false; 703224145Sdim 704224145Sdim} 705224145Sdim 706296417Sdimbool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ 707218893Sdim SDValue Address = N->getOperand(1); 708239462Sdim 709218893Sdim // load gs:0 -> GS segment register. 710218893Sdim // load fs:0 -> FS segment register. 711218893Sdim // 712193323Sed // This optimization is valid because the GNU TLS model defines that 713193323Sed // gs:0 (or fs:0 on X86-64) contains its own address. 714193323Sed // For more information see http://people.redhat.com/drepper/tls.pdf 715218893Sdim if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) 716276479Sdim if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && 717239462Sdim Subtarget->isTargetLinux()) 718218893Sdim switch (N->getPointerInfo().getAddrSpace()) { 719218893Sdim case 256: 720218893Sdim AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 721218893Sdim return false; 722218893Sdim case 257: 723218893Sdim AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 724218893Sdim return false; 725218893Sdim } 726239462Sdim 727193323Sed return true; 728193323Sed} 729193323Sed 730296417Sdim/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing 731296417Sdim/// mode. These wrap things that will resolve down into a symbol reference. 732296417Sdim/// If no match is possible, this returns true, otherwise it returns false. 733296417Sdimbool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { 734195098Sed // If the addressing mode already has a symbol as the displacement, we can 735195098Sed // never match another symbol. 736193323Sed if (AM.hasSymbolicDisplacement()) 737193323Sed return true; 738193323Sed 739193323Sed SDValue N0 = N.getOperand(0); 740198090Srdivacky CodeModel::Model M = TM.getCodeModel(); 741198090Srdivacky 742195098Sed // Handle X86-64 rip-relative addresses. We check this before checking direct 743195098Sed // folding because RIP is preferable to non-RIP accesses. 744234353Sdim if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP && 745195098Sed // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 746195098Sed // they cannot be folded into immediate fields. 747195098Sed // FIXME: This can be improved for kernel and other models? 748234353Sdim (M == CodeModel::Small || M == CodeModel::Kernel)) { 749234353Sdim // Base and index reg must be 0 in order to use %rip as base. 750234353Sdim if (AM.hasBaseOrIndexReg()) 751234353Sdim return true; 752195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 753224145Sdim X86ISelAddressMode Backup = AM; 754195098Sed AM.GV = G->getGlobal(); 755195098Sed AM.SymbolFlags = G->getTargetFlags(); 756296417Sdim if (foldOffsetIntoAddress(G->getOffset(), AM)) { 757224145Sdim AM = Backup; 758224145Sdim return true; 759224145Sdim } 760195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 761224145Sdim X86ISelAddressMode Backup = AM; 762195098Sed AM.CP = CP->getConstVal(); 763195098Sed AM.Align = CP->getAlignment(); 764195098Sed AM.SymbolFlags = CP->getTargetFlags(); 765296417Sdim if (foldOffsetIntoAddress(CP->getOffset(), AM)) { 766224145Sdim AM = Backup; 767224145Sdim return true; 768224145Sdim } 769195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 770195098Sed AM.ES = S->getSymbol(); 771195098Sed AM.SymbolFlags = S->getTargetFlags(); 772288943Sdim } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) { 773288943Sdim AM.MCSym = S->getMCSymbol(); 774198892Srdivacky } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 775195098Sed AM.JT = J->getIndex(); 776195098Sed AM.SymbolFlags = J->getTargetFlags(); 777243830Sdim } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { 778243830Sdim X86ISelAddressMode Backup = AM; 779243830Sdim AM.BlockAddr = BA->getBlockAddress(); 780243830Sdim AM.SymbolFlags = BA->getTargetFlags(); 781296417Sdim if (foldOffsetIntoAddress(BA->getOffset(), AM)) { 782243830Sdim AM = Backup; 783243830Sdim return true; 784243830Sdim } 785243830Sdim } else 786243830Sdim llvm_unreachable("Unhandled symbol reference node."); 787198090Srdivacky 788195098Sed if (N.getOpcode() == X86ISD::WrapperRIP) 789195098Sed AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 790195098Sed return false; 791195098Sed } 792195098Sed 793195098Sed // Handle the case when globals fit in our immediate field: This is true for 794234353Sdim // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit 795234353Sdim // mode, this only applies to a non-RIP-relative computation. 796195098Sed if (!Subtarget->is64Bit() || 797234353Sdim M == CodeModel::Small || M == CodeModel::Kernel) { 798234353Sdim assert(N.getOpcode() != X86ISD::WrapperRIP && 799234353Sdim "RIP-relative addressing already handled"); 800195098Sed if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 801195098Sed AM.GV = G->getGlobal(); 802195098Sed AM.Disp += G->getOffset(); 803195098Sed AM.SymbolFlags = G->getTargetFlags(); 804195098Sed } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 805193323Sed AM.CP = CP->getConstVal(); 806193323Sed AM.Align = CP->getAlignment(); 807195098Sed AM.Disp += CP->getOffset(); 808195098Sed AM.SymbolFlags = CP->getTargetFlags(); 809195098Sed } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 810195098Sed AM.ES = S->getSymbol(); 811195098Sed AM.SymbolFlags = S->getTargetFlags(); 812288943Sdim } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) { 813288943Sdim AM.MCSym = S->getMCSymbol(); 814198892Srdivacky } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 815195098Sed AM.JT = J->getIndex(); 816195098Sed AM.SymbolFlags = J->getTargetFlags(); 817243830Sdim } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { 818243830Sdim AM.BlockAddr = BA->getBlockAddress(); 819243830Sdim AM.Disp += BA->getOffset(); 820243830Sdim AM.SymbolFlags = BA->getTargetFlags(); 821243830Sdim } else 822243830Sdim llvm_unreachable("Unhandled symbol reference node."); 823193323Sed return false; 824193323Sed } 825193323Sed 826193323Sed return true; 827193323Sed} 828193323Sed 829296417Sdim/// Add the specified node to the specified addressing mode, returning true if 830296417Sdim/// it cannot be done. This just pattern matches for the addressing mode. 831296417Sdimbool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { 832296417Sdim if (matchAddressRecursively(N, AM, 0)) 833198090Srdivacky return true; 834198090Srdivacky 835198090Srdivacky // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 836198090Srdivacky // a smaller encoding and avoids a scaled-index. 837198090Srdivacky if (AM.Scale == 2 && 838198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 839276479Sdim AM.Base_Reg.getNode() == nullptr) { 840207618Srdivacky AM.Base_Reg = AM.IndexReg; 841198090Srdivacky AM.Scale = 1; 842198090Srdivacky } 843198090Srdivacky 844198090Srdivacky // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 845198090Srdivacky // because it has a smaller encoding. 846198090Srdivacky // TODO: Which other code models can use this? 847198090Srdivacky if (TM.getCodeModel() == CodeModel::Small && 848198090Srdivacky Subtarget->is64Bit() && 849198090Srdivacky AM.Scale == 1 && 850198090Srdivacky AM.BaseType == X86ISelAddressMode::RegBase && 851276479Sdim AM.Base_Reg.getNode() == nullptr && 852276479Sdim AM.IndexReg.getNode() == nullptr && 853198090Srdivacky AM.SymbolFlags == X86II::MO_NO_FLAG && 854198090Srdivacky AM.hasSymbolicDisplacement()) 855207618Srdivacky AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 856198090Srdivacky 857198090Srdivacky return false; 858198090Srdivacky} 859198090Srdivacky 860296417Sdimbool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM, 861296417Sdim unsigned Depth) { 862296417Sdim // Add an artificial use to this node so that we can keep track of 863296417Sdim // it if it gets CSE'd with a different node. 864296417Sdim HandleSDNode Handle(N); 865296417Sdim 866296417Sdim X86ISelAddressMode Backup = AM; 867296417Sdim if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) && 868296417Sdim !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) 869296417Sdim return false; 870296417Sdim AM = Backup; 871296417Sdim 872296417Sdim // Try again after commuting the operands. 873296417Sdim if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) && 874296417Sdim !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) 875296417Sdim return false; 876296417Sdim AM = Backup; 877296417Sdim 878296417Sdim // If we couldn't fold both operands into the address at the same time, 879296417Sdim // see if we can just put each operand into a register and fold at least 880296417Sdim // the add. 881296417Sdim if (AM.BaseType == X86ISelAddressMode::RegBase && 882296417Sdim !AM.Base_Reg.getNode() && 883296417Sdim !AM.IndexReg.getNode()) { 884296417Sdim N = Handle.getValue(); 885296417Sdim AM.Base_Reg = N.getOperand(0); 886296417Sdim AM.IndexReg = N.getOperand(1); 887296417Sdim AM.Scale = 1; 888296417Sdim return false; 889296417Sdim } 890296417Sdim N = Handle.getValue(); 891296417Sdim return true; 892296417Sdim} 893296417Sdim 894234353Sdim// Insert a node into the DAG at least before the Pos node's position. This 895234353Sdim// will reposition the node as needed, and will assign it a node ID that is <= 896234353Sdim// the Pos node's ID. Note that this does *not* preserve the uniqueness of node 897234353Sdim// IDs! The selection DAG must no longer depend on their uniqueness when this 898234353Sdim// is used. 899296417Sdimstatic void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { 900234353Sdim if (N.getNode()->getNodeId() == -1 || 901234353Sdim N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) { 902296417Sdim DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode()); 903234353Sdim N.getNode()->setNodeId(Pos.getNode()->getNodeId()); 904234353Sdim } 905234353Sdim} 906234353Sdim 907280031Sdim// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if 908280031Sdim// safe. This allows us to convert the shift and and into an h-register 909280031Sdim// extract and a scaled index. Returns false if the simplification is 910280031Sdim// performed. 911296417Sdimstatic bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, 912234353Sdim uint64_t Mask, 913234353Sdim SDValue Shift, SDValue X, 914234353Sdim X86ISelAddressMode &AM) { 915234353Sdim if (Shift.getOpcode() != ISD::SRL || 916234353Sdim !isa<ConstantSDNode>(Shift.getOperand(1)) || 917234353Sdim !Shift.hasOneUse()) 918234353Sdim return true; 919234353Sdim 920234353Sdim int ScaleLog = 8 - Shift.getConstantOperandVal(1); 921234353Sdim if (ScaleLog <= 0 || ScaleLog >= 4 || 922234353Sdim Mask != (0xffu << ScaleLog)) 923234353Sdim return true; 924234353Sdim 925261991Sdim MVT VT = N.getSimpleValueType(); 926261991Sdim SDLoc DL(N); 927288943Sdim SDValue Eight = DAG.getConstant(8, DL, MVT::i8); 928288943Sdim SDValue NewMask = DAG.getConstant(0xff, DL, VT); 929234353Sdim SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); 930234353Sdim SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); 931288943Sdim SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8); 932234353Sdim SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); 933234353Sdim 934234353Sdim // Insert the new nodes into the topological ordering. We must do this in 935234353Sdim // a valid topological ordering as nothing is going to go back and re-sort 936234353Sdim // these nodes. We continually insert before 'N' in sequence as this is 937234353Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 938234353Sdim // hierarchy left to express. 939296417Sdim insertDAGNode(DAG, N, Eight); 940296417Sdim insertDAGNode(DAG, N, Srl); 941296417Sdim insertDAGNode(DAG, N, NewMask); 942296417Sdim insertDAGNode(DAG, N, And); 943296417Sdim insertDAGNode(DAG, N, ShlCount); 944296417Sdim insertDAGNode(DAG, N, Shl); 945234353Sdim DAG.ReplaceAllUsesWith(N, Shl); 946234353Sdim AM.IndexReg = And; 947234353Sdim AM.Scale = (1 << ScaleLog); 948234353Sdim return false; 949234353Sdim} 950234353Sdim 951234353Sdim// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this 952234353Sdim// allows us to fold the shift into this addressing mode. Returns false if the 953234353Sdim// transform succeeded. 954296417Sdimstatic bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, 955234353Sdim uint64_t Mask, 956234353Sdim SDValue Shift, SDValue X, 957234353Sdim X86ISelAddressMode &AM) { 958234353Sdim if (Shift.getOpcode() != ISD::SHL || 959234353Sdim !isa<ConstantSDNode>(Shift.getOperand(1))) 960234353Sdim return true; 961234353Sdim 962234353Sdim // Not likely to be profitable if either the AND or SHIFT node has more 963234353Sdim // than one use (unless all uses are for address computation). Besides, 964234353Sdim // isel mechanism requires their node ids to be reused. 965234353Sdim if (!N.hasOneUse() || !Shift.hasOneUse()) 966234353Sdim return true; 967234353Sdim 968234353Sdim // Verify that the shift amount is something we can fold. 969234353Sdim unsigned ShiftAmt = Shift.getConstantOperandVal(1); 970234353Sdim if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) 971234353Sdim return true; 972234353Sdim 973261991Sdim MVT VT = N.getSimpleValueType(); 974261991Sdim SDLoc DL(N); 975288943Sdim SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT); 976234353Sdim SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); 977234353Sdim SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); 978234353Sdim 979234353Sdim // Insert the new nodes into the topological ordering. We must do this in 980234353Sdim // a valid topological ordering as nothing is going to go back and re-sort 981234353Sdim // these nodes. We continually insert before 'N' in sequence as this is 982234353Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 983234353Sdim // hierarchy left to express. 984296417Sdim insertDAGNode(DAG, N, NewMask); 985296417Sdim insertDAGNode(DAG, N, NewAnd); 986296417Sdim insertDAGNode(DAG, N, NewShift); 987234353Sdim DAG.ReplaceAllUsesWith(N, NewShift); 988234353Sdim 989234353Sdim AM.Scale = 1 << ShiftAmt; 990234353Sdim AM.IndexReg = NewAnd; 991234353Sdim return false; 992234353Sdim} 993234353Sdim 994234353Sdim// Implement some heroics to detect shifts of masked values where the mask can 995234353Sdim// be replaced by extending the shift and undoing that in the addressing mode 996234353Sdim// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and 997234353Sdim// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in 998234353Sdim// the addressing mode. This results in code such as: 999234353Sdim// 1000234353Sdim// int f(short *y, int *lookup_table) { 1001234353Sdim// ... 1002234353Sdim// return *y + lookup_table[*y >> 11]; 1003234353Sdim// } 1004234353Sdim// 1005234353Sdim// Turning into: 1006234353Sdim// movzwl (%rdi), %eax 1007234353Sdim// movl %eax, %ecx 1008234353Sdim// shrl $11, %ecx 1009234353Sdim// addl (%rsi,%rcx,4), %eax 1010234353Sdim// 1011234353Sdim// Instead of: 1012234353Sdim// movzwl (%rdi), %eax 1013234353Sdim// movl %eax, %ecx 1014234353Sdim// shrl $9, %ecx 1015234353Sdim// andl $124, %rcx 1016234353Sdim// addl (%rsi,%rcx), %eax 1017234353Sdim// 1018234353Sdim// Note that this function assumes the mask is provided as a mask *after* the 1019234353Sdim// value is shifted. The input chain may or may not match that, but computing 1020234353Sdim// such a mask is trivial. 1021296417Sdimstatic bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, 1022234353Sdim uint64_t Mask, 1023234353Sdim SDValue Shift, SDValue X, 1024234353Sdim X86ISelAddressMode &AM) { 1025234353Sdim if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || 1026234353Sdim !isa<ConstantSDNode>(Shift.getOperand(1))) 1027234353Sdim return true; 1028234353Sdim 1029234353Sdim unsigned ShiftAmt = Shift.getConstantOperandVal(1); 1030261991Sdim unsigned MaskLZ = countLeadingZeros(Mask); 1031261991Sdim unsigned MaskTZ = countTrailingZeros(Mask); 1032234353Sdim 1033234353Sdim // The amount of shift we're trying to fit into the addressing mode is taken 1034234353Sdim // from the trailing zeros of the mask. 1035234353Sdim unsigned AMShiftAmt = MaskTZ; 1036234353Sdim 1037234353Sdim // There is nothing we can do here unless the mask is removing some bits. 1038234353Sdim // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. 1039234353Sdim if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true; 1040234353Sdim 1041234353Sdim // We also need to ensure that mask is a continuous run of bits. 1042288943Sdim if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; 1043234353Sdim 1044234353Sdim // Scale the leading zero count down based on the actual size of the value. 1045234353Sdim // Also scale it down based on the size of the shift. 1046261991Sdim MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; 1047234353Sdim 1048234353Sdim // The final check is to ensure that any masked out high bits of X are 1049234353Sdim // already known to be zero. Otherwise, the mask has a semantic impact 1050234353Sdim // other than masking out a couple of low bits. Unfortunately, because of 1051234353Sdim // the mask, zero extensions will be removed from operands in some cases. 1052234353Sdim // This code works extra hard to look through extensions because we can 1053234353Sdim // replace them with zero extensions cheaply if necessary. 1054234353Sdim bool ReplacingAnyExtend = false; 1055234353Sdim if (X.getOpcode() == ISD::ANY_EXTEND) { 1056261991Sdim unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - 1057261991Sdim X.getOperand(0).getSimpleValueType().getSizeInBits(); 1058234353Sdim // Assume that we'll replace the any-extend with a zero-extend, and 1059234353Sdim // narrow the search to the extended value. 1060234353Sdim X = X.getOperand(0); 1061234353Sdim MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; 1062234353Sdim ReplacingAnyExtend = true; 1063234353Sdim } 1064261991Sdim APInt MaskedHighBits = 1065261991Sdim APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); 1066234353Sdim APInt KnownZero, KnownOne; 1067276479Sdim DAG.computeKnownBits(X, KnownZero, KnownOne); 1068234353Sdim if (MaskedHighBits != KnownZero) return true; 1069234353Sdim 1070234353Sdim // We've identified a pattern that can be transformed into a single shift 1071234353Sdim // and an addressing mode. Make it so. 1072261991Sdim MVT VT = N.getSimpleValueType(); 1073234353Sdim if (ReplacingAnyExtend) { 1074234353Sdim assert(X.getValueType() != VT); 1075234353Sdim // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. 1076261991Sdim SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); 1077296417Sdim insertDAGNode(DAG, N, NewX); 1078234353Sdim X = NewX; 1079234353Sdim } 1080261991Sdim SDLoc DL(N); 1081288943Sdim SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); 1082234353Sdim SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); 1083288943Sdim SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); 1084234353Sdim SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); 1085234353Sdim 1086234353Sdim // Insert the new nodes into the topological ordering. We must do this in 1087234353Sdim // a valid topological ordering as nothing is going to go back and re-sort 1088234353Sdim // these nodes. We continually insert before 'N' in sequence as this is 1089234353Sdim // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 1090234353Sdim // hierarchy left to express. 1091296417Sdim insertDAGNode(DAG, N, NewSRLAmt); 1092296417Sdim insertDAGNode(DAG, N, NewSRL); 1093296417Sdim insertDAGNode(DAG, N, NewSHLAmt); 1094296417Sdim insertDAGNode(DAG, N, NewSHL); 1095234353Sdim DAG.ReplaceAllUsesWith(N, NewSHL); 1096234353Sdim 1097234353Sdim AM.Scale = 1 << AMShiftAmt; 1098234353Sdim AM.IndexReg = NewSRL; 1099234353Sdim return false; 1100234353Sdim} 1101234353Sdim 1102296417Sdimbool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 1103198090Srdivacky unsigned Depth) { 1104261991Sdim SDLoc dl(N); 1105198090Srdivacky DEBUG({ 1106202375Srdivacky dbgs() << "MatchAddress: "; 1107198090Srdivacky AM.dump(); 1108198090Srdivacky }); 1109193323Sed // Limit recursion. 1110193323Sed if (Depth > 5) 1111296417Sdim return matchAddressBase(N, AM); 1112198090Srdivacky 1113195098Sed // If this is already a %rip relative address, we can only merge immediates 1114195098Sed // into it. Instead of handling this in every case, we handle it here. 1115193323Sed // RIP relative addressing: %rip + 32-bit displacement! 1116195098Sed if (AM.isRIPRelative()) { 1117195098Sed // FIXME: JumpTable and ExternalSymbol address currently don't like 1118195098Sed // displacements. It isn't very important, but this should be fixed for 1119195098Sed // consistency. 1120288943Sdim if (!(AM.ES || AM.MCSym) && AM.JT != -1) 1121288943Sdim return true; 1122198090Srdivacky 1123224145Sdim if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) 1124296417Sdim if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM)) 1125193323Sed return false; 1126193323Sed return true; 1127193323Sed } 1128193323Sed 1129193323Sed switch (N.getOpcode()) { 1130193323Sed default: break; 1131288943Sdim case ISD::LOCAL_RECOVER: { 1132288943Sdim if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) 1133288943Sdim if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) { 1134288943Sdim // Use the symbol and don't prefix it. 1135288943Sdim AM.MCSym = ESNode->getMCSymbol(); 1136288943Sdim return false; 1137288943Sdim } 1138288943Sdim break; 1139288943Sdim } 1140193323Sed case ISD::Constant: { 1141193323Sed uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 1142296417Sdim if (!foldOffsetIntoAddress(Val, AM)) 1143193323Sed return false; 1144193323Sed break; 1145193323Sed } 1146193323Sed 1147193323Sed case X86ISD::Wrapper: 1148195098Sed case X86ISD::WrapperRIP: 1149296417Sdim if (!matchWrapper(N, AM)) 1150193323Sed return false; 1151193323Sed break; 1152193323Sed 1153193323Sed case ISD::LOAD: 1154296417Sdim if (!matchLoadInAddress(cast<LoadSDNode>(N), AM)) 1155193323Sed return false; 1156193323Sed break; 1157193323Sed 1158193323Sed case ISD::FrameIndex: 1159224145Sdim if (AM.BaseType == X86ISelAddressMode::RegBase && 1160276479Sdim AM.Base_Reg.getNode() == nullptr && 1161224145Sdim (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { 1162193323Sed AM.BaseType = X86ISelAddressMode::FrameIndexBase; 1163207618Srdivacky AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 1164193323Sed return false; 1165193323Sed } 1166193323Sed break; 1167193323Sed 1168193323Sed case ISD::SHL: 1169276479Sdim if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) 1170193323Sed break; 1171239462Sdim 1172193323Sed if (ConstantSDNode 1173193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 1174193323Sed unsigned Val = CN->getZExtValue(); 1175198090Srdivacky // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 1176198090Srdivacky // that the base operand remains free for further matching. If 1177198090Srdivacky // the base doesn't end up getting used, a post-processing step 1178198090Srdivacky // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 1179193323Sed if (Val == 1 || Val == 2 || Val == 3) { 1180193323Sed AM.Scale = 1 << Val; 1181193323Sed SDValue ShVal = N.getNode()->getOperand(0); 1182193323Sed 1183193323Sed // Okay, we know that we have a scale by now. However, if the scaled 1184193323Sed // value is an add of something and a constant, we can fold the 1185193323Sed // constant into the disp field here. 1186218893Sdim if (CurDAG->isBaseWithConstantOffset(ShVal)) { 1187193323Sed AM.IndexReg = ShVal.getNode()->getOperand(0); 1188193323Sed ConstantSDNode *AddVal = 1189193323Sed cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 1190243830Sdim uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; 1191296417Sdim if (!foldOffsetIntoAddress(Disp, AM)) 1192224145Sdim return false; 1193193323Sed } 1194224145Sdim 1195224145Sdim AM.IndexReg = ShVal; 1196193323Sed return false; 1197193323Sed } 1198249423Sdim } 1199193323Sed break; 1200193323Sed 1201234353Sdim case ISD::SRL: { 1202234353Sdim // Scale must not be used already. 1203276479Sdim if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; 1204234353Sdim 1205234353Sdim SDValue And = N.getOperand(0); 1206234353Sdim if (And.getOpcode() != ISD::AND) break; 1207234353Sdim SDValue X = And.getOperand(0); 1208234353Sdim 1209234353Sdim // We only handle up to 64-bit values here as those are what matter for 1210234353Sdim // addressing mode optimizations. 1211261991Sdim if (X.getSimpleValueType().getSizeInBits() > 64) break; 1212234353Sdim 1213234353Sdim // The mask used for the transform is expected to be post-shift, but we 1214234353Sdim // found the shift first so just apply the shift to the mask before passing 1215234353Sdim // it down. 1216234353Sdim if (!isa<ConstantSDNode>(N.getOperand(1)) || 1217234353Sdim !isa<ConstantSDNode>(And.getOperand(1))) 1218234353Sdim break; 1219234353Sdim uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); 1220234353Sdim 1221234353Sdim // Try to fold the mask and shift into the scale, and return false if we 1222234353Sdim // succeed. 1223296417Sdim if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) 1224234353Sdim return false; 1225234353Sdim break; 1226234353Sdim } 1227234353Sdim 1228193323Sed case ISD::SMUL_LOHI: 1229193323Sed case ISD::UMUL_LOHI: 1230193323Sed // A mul_lohi where we need the low part can be folded as a plain multiply. 1231193323Sed if (N.getResNo() != 0) break; 1232193323Sed // FALL THROUGH 1233193323Sed case ISD::MUL: 1234193323Sed case X86ISD::MUL_IMM: 1235193323Sed // X*[3,5,9] -> X+X*[2,4,8] 1236193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase && 1237276479Sdim AM.Base_Reg.getNode() == nullptr && 1238276479Sdim AM.IndexReg.getNode() == nullptr) { 1239193323Sed if (ConstantSDNode 1240193323Sed *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 1241193323Sed if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 1242193323Sed CN->getZExtValue() == 9) { 1243193323Sed AM.Scale = unsigned(CN->getZExtValue())-1; 1244193323Sed 1245193323Sed SDValue MulVal = N.getNode()->getOperand(0); 1246193323Sed SDValue Reg; 1247193323Sed 1248193323Sed // Okay, we know that we have a scale by now. However, if the scaled 1249193323Sed // value is an add of something and a constant, we can fold the 1250193323Sed // constant into the disp field here. 1251193323Sed if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 1252193323Sed isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 1253193323Sed Reg = MulVal.getNode()->getOperand(0); 1254193323Sed ConstantSDNode *AddVal = 1255193323Sed cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 1256224145Sdim uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); 1257296417Sdim if (foldOffsetIntoAddress(Disp, AM)) 1258193323Sed Reg = N.getNode()->getOperand(0); 1259193323Sed } else { 1260193323Sed Reg = N.getNode()->getOperand(0); 1261193323Sed } 1262193323Sed 1263207618Srdivacky AM.IndexReg = AM.Base_Reg = Reg; 1264193323Sed return false; 1265193323Sed } 1266193323Sed } 1267193323Sed break; 1268193323Sed 1269193323Sed case ISD::SUB: { 1270193323Sed // Given A-B, if A can be completely folded into the address and 1271193323Sed // the index field with the index field unused, use -B as the index. 1272193323Sed // This is a win if a has multiple parts that can be folded into 1273193323Sed // the address. Also, this saves a mov if the base register has 1274193323Sed // other uses, since it avoids a two-address sub instruction, however 1275193323Sed // it costs an additional mov if the index register has other uses. 1276193323Sed 1277210299Sed // Add an artificial use to this node so that we can keep track of 1278210299Sed // it if it gets CSE'd with a different node. 1279210299Sed HandleSDNode Handle(N); 1280210299Sed 1281193323Sed // Test if the LHS of the sub can be folded. 1282193323Sed X86ISelAddressMode Backup = AM; 1283296417Sdim if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 1284193323Sed AM = Backup; 1285193323Sed break; 1286193323Sed } 1287193323Sed // Test if the index field is free for use. 1288195098Sed if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 1289193323Sed AM = Backup; 1290193323Sed break; 1291193323Sed } 1292205407Srdivacky 1293193323Sed int Cost = 0; 1294210299Sed SDValue RHS = Handle.getValue().getNode()->getOperand(1); 1295193323Sed // If the RHS involves a register with multiple uses, this 1296193323Sed // transformation incurs an extra mov, due to the neg instruction 1297193323Sed // clobbering its operand. 1298193323Sed if (!RHS.getNode()->hasOneUse() || 1299193323Sed RHS.getNode()->getOpcode() == ISD::CopyFromReg || 1300193323Sed RHS.getNode()->getOpcode() == ISD::TRUNCATE || 1301193323Sed RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 1302193323Sed (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 1303193323Sed RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 1304193323Sed ++Cost; 1305193323Sed // If the base is a register with multiple uses, this 1306193323Sed // transformation may save a mov. 1307193323Sed if ((AM.BaseType == X86ISelAddressMode::RegBase && 1308207618Srdivacky AM.Base_Reg.getNode() && 1309207618Srdivacky !AM.Base_Reg.getNode()->hasOneUse()) || 1310193323Sed AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1311193323Sed --Cost; 1312193323Sed // If the folded LHS was interesting, this transformation saves 1313193323Sed // address arithmetic. 1314193323Sed if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 1315193323Sed ((AM.Disp != 0) && (Backup.Disp == 0)) + 1316193323Sed (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 1317193323Sed --Cost; 1318193323Sed // If it doesn't look like it may be an overall win, don't do it. 1319193323Sed if (Cost >= 0) { 1320193323Sed AM = Backup; 1321193323Sed break; 1322193323Sed } 1323193323Sed 1324193323Sed // Ok, the transformation is legal and appears profitable. Go for it. 1325288943Sdim SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType()); 1326193323Sed SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 1327193323Sed AM.IndexReg = Neg; 1328193323Sed AM.Scale = 1; 1329193323Sed 1330193323Sed // Insert the new nodes into the topological ordering. 1331296417Sdim insertDAGNode(*CurDAG, N, Zero); 1332296417Sdim insertDAGNode(*CurDAG, N, Neg); 1333193323Sed return false; 1334193323Sed } 1335193323Sed 1336296417Sdim case ISD::ADD: 1337296417Sdim if (!matchAdd(N, AM, Depth)) 1338210299Sed return false; 1339193323Sed break; 1340193323Sed 1341193323Sed case ISD::OR: 1342296417Sdim // We want to look through a transform in InstCombine and DAGCombiner that 1343296417Sdim // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'. 1344296417Sdim // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3)) 1345296417Sdim // An 'lea' can then be used to match the shift (multiply) and add: 1346296417Sdim // and $1, %esi 1347296417Sdim // lea (%rsi, %rdi, 8), %rax 1348296417Sdim if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) && 1349296417Sdim !matchAdd(N, AM, Depth)) 1350296417Sdim return false; 1351193323Sed break; 1352239462Sdim 1353193323Sed case ISD::AND: { 1354193323Sed // Perform some heroic transforms on an and of a constant-count shift 1355193323Sed // with a constant to enable use of the scaled offset field. 1356193323Sed 1357193323Sed // Scale must not be used already. 1358276479Sdim if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; 1359193323Sed 1360234353Sdim SDValue Shift = N.getOperand(0); 1361234353Sdim if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; 1362193323Sed SDValue X = Shift.getOperand(0); 1363193323Sed 1364234353Sdim // We only handle up to 64-bit values here as those are what matter for 1365234353Sdim // addressing mode optimizations. 1366261991Sdim if (X.getSimpleValueType().getSizeInBits() > 64) break; 1367193323Sed 1368234353Sdim if (!isa<ConstantSDNode>(N.getOperand(1))) 1369234353Sdim break; 1370234353Sdim uint64_t Mask = N.getConstantOperandVal(1); 1371193323Sed 1372234353Sdim // Try to fold the mask and shift into an extract and scale. 1373296417Sdim if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) 1374234353Sdim return false; 1375193323Sed 1376234353Sdim // Try to fold the mask and shift directly into the scale. 1377296417Sdim if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) 1378234353Sdim return false; 1379193323Sed 1380234353Sdim // Try to swap the mask and shift to place shifts which can be done as 1381234353Sdim // a scale on the outside of the mask. 1382296417Sdim if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM)) 1383234353Sdim return false; 1384234353Sdim break; 1385193323Sed } 1386193323Sed } 1387193323Sed 1388296417Sdim return matchAddressBase(N, AM); 1389193323Sed} 1390193323Sed 1391296417Sdim/// Helper for MatchAddress. Add the specified node to the 1392193323Sed/// specified addressing mode without any further recursion. 1393296417Sdimbool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1394193323Sed // Is the base register already occupied? 1395207618Srdivacky if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { 1396193323Sed // If so, check to see if the scale index register is set. 1397276479Sdim if (!AM.IndexReg.getNode()) { 1398193323Sed AM.IndexReg = N; 1399193323Sed AM.Scale = 1; 1400193323Sed return false; 1401193323Sed } 1402193323Sed 1403193323Sed // Otherwise, we cannot select it. 1404193323Sed return true; 1405193323Sed } 1406193323Sed 1407193323Sed // Default, generate it as a register. 1408193323Sed AM.BaseType = X86ISelAddressMode::RegBase; 1409207618Srdivacky AM.Base_Reg = N; 1410193323Sed return false; 1411193323Sed} 1412193323Sed 1413296417Sdimbool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, 1414288943Sdim SDValue &Scale, SDValue &Index, 1415288943Sdim SDValue &Disp, SDValue &Segment) { 1416288943Sdim 1417288943Sdim MaskedGatherScatterSDNode *Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent); 1418288943Sdim if (!Mgs) 1419288943Sdim return false; 1420288943Sdim X86ISelAddressMode AM; 1421288943Sdim unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace(); 1422288943Sdim // AddrSpace 256 -> GS, 257 -> FS. 1423288943Sdim if (AddrSpace == 256) 1424288943Sdim AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 1425288943Sdim if (AddrSpace == 257) 1426288943Sdim AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 1427288943Sdim 1428288943Sdim SDLoc DL(N); 1429288943Sdim Base = Mgs->getBasePtr(); 1430288943Sdim Index = Mgs->getIndex(); 1431288943Sdim unsigned ScalarSize = Mgs->getValue().getValueType().getScalarSizeInBits(); 1432288943Sdim Scale = getI8Imm(ScalarSize/8, DL); 1433288943Sdim 1434288943Sdim // If Base is 0, the whole address is in index and the Scale is 1 1435288943Sdim if (isa<ConstantSDNode>(Base)) { 1436296417Sdim assert(cast<ConstantSDNode>(Base)->isNullValue() && 1437288943Sdim "Unexpected base in gather/scatter"); 1438288943Sdim Scale = getI8Imm(1, DL); 1439288943Sdim Base = CurDAG->getRegister(0, MVT::i32); 1440288943Sdim } 1441288943Sdim if (AM.Segment.getNode()) 1442288943Sdim Segment = AM.Segment; 1443288943Sdim else 1444288943Sdim Segment = CurDAG->getRegister(0, MVT::i32); 1445288943Sdim Disp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1446288943Sdim return true; 1447288943Sdim} 1448288943Sdim 1449296417Sdim/// Returns true if it is able to pattern match an addressing mode. 1450193323Sed/// It returns the operands which make up the maximal addressing mode it can 1451193323Sed/// match by reference. 1452218893Sdim/// 1453218893Sdim/// Parent is the parent node of the addr operand that is being matched. It 1454218893Sdim/// is always a load, store, atomic node, or null. It is only null when 1455218893Sdim/// checking memory operands for inline asm nodes. 1456296417Sdimbool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, 1457193323Sed SDValue &Scale, SDValue &Index, 1458193323Sed SDValue &Disp, SDValue &Segment) { 1459193323Sed X86ISelAddressMode AM; 1460239462Sdim 1461218893Sdim if (Parent && 1462218893Sdim // This list of opcodes are all the nodes that have an "addr:$ptr" operand 1463218893Sdim // that are not a MemSDNode, and thus don't have proper addrspace info. 1464218893Sdim Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme 1465218893Sdim Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores 1466243830Sdim Parent->getOpcode() != X86ISD::TLSCALL && // Fixme 1467243830Sdim Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp 1468243830Sdim Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp 1469218893Sdim unsigned AddrSpace = 1470218893Sdim cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); 1471218893Sdim // AddrSpace 256 -> GS, 257 -> FS. 1472218893Sdim if (AddrSpace == 256) 1473218893Sdim AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 1474218893Sdim if (AddrSpace == 257) 1475218893Sdim AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 1476218893Sdim } 1477239462Sdim 1478296417Sdim if (matchAddress(N, AM)) 1479193323Sed return false; 1480193323Sed 1481261991Sdim MVT VT = N.getSimpleValueType(); 1482193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) { 1483207618Srdivacky if (!AM.Base_Reg.getNode()) 1484207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, VT); 1485193323Sed } 1486193323Sed 1487193323Sed if (!AM.IndexReg.getNode()) 1488193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1489193323Sed 1490288943Sdim getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment); 1491193323Sed return true; 1492193323Sed} 1493193323Sed 1494296417Sdim/// Match a scalar SSE load. In particular, we want to match a load whose top 1495296417Sdim/// elements are either undef or zeros. The load flavor is derived from the 1496296417Sdim/// type of N, which is either v4f32 or v2f64. 1497204642Srdivacky/// 1498204642Srdivacky/// We also return: 1499204642Srdivacky/// PatternChainNode: this is the matched node that has a chain input and 1500204642Srdivacky/// output. 1501296417Sdimbool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, 1502193323Sed SDValue N, SDValue &Base, 1503193323Sed SDValue &Scale, SDValue &Index, 1504193323Sed SDValue &Disp, SDValue &Segment, 1505204642Srdivacky SDValue &PatternNodeWithChain) { 1506193323Sed if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1507204642Srdivacky PatternNodeWithChain = N.getOperand(0); 1508204642Srdivacky if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && 1509204642Srdivacky PatternNodeWithChain.hasOneUse() && 1510204642Srdivacky IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1511207618Srdivacky IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 1512204642Srdivacky LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); 1513296417Sdim if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1514193323Sed return false; 1515193323Sed return true; 1516193323Sed } 1517193323Sed } 1518193323Sed 1519193323Sed // Also handle the case where we explicitly require zeros in the top 1520193323Sed // elements. This is a vector shuffle from the zero vector. 1521193323Sed if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1522193323Sed // Check to see if the top elements are all zeros (or bitcast of zeros). 1523239462Sdim N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1524193323Sed N.getOperand(0).getNode()->hasOneUse() && 1525193323Sed ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1526204642Srdivacky N.getOperand(0).getOperand(0).hasOneUse() && 1527204642Srdivacky IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1528207618Srdivacky IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 1529193323Sed // Okay, this is a zero extending load. Fold it. 1530193323Sed LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1531296417Sdim if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1532193323Sed return false; 1533204642Srdivacky PatternNodeWithChain = SDValue(LD, 0); 1534193323Sed return true; 1535193323Sed } 1536193323Sed return false; 1537193323Sed} 1538193323Sed 1539193323Sed 1540296417Sdimbool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { 1541261991Sdim if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1542261991Sdim uint64_t ImmVal = CN->getZExtValue(); 1543261991Sdim if ((uint32_t)ImmVal != (uint64_t)ImmVal) 1544261991Sdim return false; 1545261991Sdim 1546288943Sdim Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64); 1547261991Sdim return true; 1548261991Sdim } 1549261991Sdim 1550261991Sdim // In static codegen with small code model, we can get the address of a label 1551261991Sdim // into a register with 'movl'. TableGen has already made sure we're looking 1552261991Sdim // at a label of some kind. 1553261991Sdim assert(N->getOpcode() == X86ISD::Wrapper && 1554261991Sdim "Unexpected node type for MOV32ri64"); 1555261991Sdim N = N.getOperand(0); 1556261991Sdim 1557261991Sdim if (N->getOpcode() != ISD::TargetConstantPool && 1558261991Sdim N->getOpcode() != ISD::TargetJumpTable && 1559261991Sdim N->getOpcode() != ISD::TargetGlobalAddress && 1560261991Sdim N->getOpcode() != ISD::TargetExternalSymbol && 1561288943Sdim N->getOpcode() != ISD::MCSymbol && 1562261991Sdim N->getOpcode() != ISD::TargetBlockAddress) 1563261991Sdim return false; 1564261991Sdim 1565261991Sdim Imm = N; 1566261991Sdim return TM.getCodeModel() == CodeModel::Small; 1567261991Sdim} 1568261991Sdim 1569296417Sdimbool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, 1570261991Sdim SDValue &Scale, SDValue &Index, 1571261991Sdim SDValue &Disp, SDValue &Segment) { 1572296417Sdim if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) 1573261991Sdim return false; 1574261991Sdim 1575261991Sdim SDLoc DL(N); 1576261991Sdim RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base); 1577261991Sdim if (RN && RN->getReg() == 0) 1578261991Sdim Base = CurDAG->getRegister(0, MVT::i64); 1579280031Sdim else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) { 1580261991Sdim // Base could already be %rip, particularly in the x32 ABI. 1581261991Sdim Base = SDValue(CurDAG->getMachineNode( 1582261991Sdim TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, 1583288943Sdim CurDAG->getTargetConstant(0, DL, MVT::i64), 1584261991Sdim Base, 1585288943Sdim CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)), 1586261991Sdim 0); 1587261991Sdim } 1588261991Sdim 1589261991Sdim RN = dyn_cast<RegisterSDNode>(Index); 1590261991Sdim if (RN && RN->getReg() == 0) 1591261991Sdim Index = CurDAG->getRegister(0, MVT::i64); 1592261991Sdim else { 1593261991Sdim assert(Index.getValueType() == MVT::i32 && 1594261991Sdim "Expect to be extending 32-bit registers for use in LEA"); 1595261991Sdim Index = SDValue(CurDAG->getMachineNode( 1596261991Sdim TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, 1597288943Sdim CurDAG->getTargetConstant(0, DL, MVT::i64), 1598261991Sdim Index, 1599288943Sdim CurDAG->getTargetConstant(X86::sub_32bit, DL, 1600288943Sdim MVT::i32)), 1601261991Sdim 0); 1602261991Sdim } 1603261991Sdim 1604261991Sdim return true; 1605261991Sdim} 1606261991Sdim 1607296417Sdim/// Calls SelectAddr and determines if the maximal addressing 1608193323Sed/// mode it matches can be cost effectively emitted as an LEA instruction. 1609296417Sdimbool X86DAGToDAGISel::selectLEAAddr(SDValue N, 1610193323Sed SDValue &Base, SDValue &Scale, 1611210299Sed SDValue &Index, SDValue &Disp, 1612210299Sed SDValue &Segment) { 1613193323Sed X86ISelAddressMode AM; 1614193323Sed 1615193323Sed // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1616193323Sed // segments. 1617193323Sed SDValue Copy = AM.Segment; 1618193323Sed SDValue T = CurDAG->getRegister(0, MVT::i32); 1619193323Sed AM.Segment = T; 1620296417Sdim if (matchAddress(N, AM)) 1621193323Sed return false; 1622193323Sed assert (T == AM.Segment); 1623193323Sed AM.Segment = Copy; 1624193323Sed 1625261991Sdim MVT VT = N.getSimpleValueType(); 1626193323Sed unsigned Complexity = 0; 1627193323Sed if (AM.BaseType == X86ISelAddressMode::RegBase) 1628207618Srdivacky if (AM.Base_Reg.getNode()) 1629193323Sed Complexity = 1; 1630193323Sed else 1631207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, VT); 1632193323Sed else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1633193323Sed Complexity = 4; 1634193323Sed 1635193323Sed if (AM.IndexReg.getNode()) 1636193323Sed Complexity++; 1637193323Sed else 1638193323Sed AM.IndexReg = CurDAG->getRegister(0, VT); 1639193323Sed 1640193323Sed // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1641193323Sed // a simple shift. 1642193323Sed if (AM.Scale > 1) 1643193323Sed Complexity++; 1644193323Sed 1645193323Sed // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1646296417Sdim // to a LEA. This is determined with some experimentation but is by no means 1647193323Sed // optimal (especially for code size consideration). LEA is nice because of 1648193323Sed // its three-address nature. Tweak the cost function again when we can run 1649193323Sed // convertToThreeAddress() at register allocation time. 1650193323Sed if (AM.hasSymbolicDisplacement()) { 1651296417Sdim // For X86-64, always use LEA to materialize RIP-relative addresses. 1652193323Sed if (Subtarget->is64Bit()) 1653193323Sed Complexity = 4; 1654193323Sed else 1655193323Sed Complexity += 2; 1656193323Sed } 1657193323Sed 1658207618Srdivacky if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode())) 1659193323Sed Complexity++; 1660193323Sed 1661198090Srdivacky // If it isn't worth using an LEA, reject it. 1662198090Srdivacky if (Complexity <= 2) 1663198090Srdivacky return false; 1664239462Sdim 1665288943Sdim getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment); 1666198090Srdivacky return true; 1667193323Sed} 1668193323Sed 1669296417Sdim/// This is only run on TargetGlobalTLSAddress nodes. 1670296417Sdimbool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, 1671194612Sed SDValue &Scale, SDValue &Index, 1672210299Sed SDValue &Disp, SDValue &Segment) { 1673194612Sed assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1674194612Sed const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1675239462Sdim 1676194612Sed X86ISelAddressMode AM; 1677194612Sed AM.GV = GA->getGlobal(); 1678194612Sed AM.Disp += GA->getOffset(); 1679207618Srdivacky AM.Base_Reg = CurDAG->getRegister(0, N.getValueType()); 1680195098Sed AM.SymbolFlags = GA->getTargetFlags(); 1681195098Sed 1682194612Sed if (N.getValueType() == MVT::i32) { 1683194612Sed AM.Scale = 1; 1684194612Sed AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1685194612Sed } else { 1686194612Sed AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1687194612Sed } 1688239462Sdim 1689288943Sdim getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment); 1690194612Sed return true; 1691194612Sed} 1692194612Sed 1693194612Sed 1694296417Sdimbool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N, 1695193323Sed SDValue &Base, SDValue &Scale, 1696193323Sed SDValue &Index, SDValue &Disp, 1697193323Sed SDValue &Segment) { 1698204642Srdivacky if (!ISD::isNON_EXTLoad(N.getNode()) || 1699204642Srdivacky !IsProfitableToFold(N, P, P) || 1700207618Srdivacky !IsLegalToFold(N, P, P, OptLevel)) 1701204642Srdivacky return false; 1702239462Sdim 1703296417Sdim return selectAddr(N.getNode(), 1704218893Sdim N.getOperand(1), Base, Scale, Index, Disp, Segment); 1705193323Sed} 1706193323Sed 1707296417Sdim/// Return an SDNode that returns the value of the global base register. 1708296417Sdim/// Output instructions required to initialize the global base register, 1709296417Sdim/// if necessary. 1710193323SedSDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1711193399Sed unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1712288943Sdim auto &DL = MF->getDataLayout(); 1713288943Sdim return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode(); 1714193323Sed} 1715193323Sed 1716243830Sdim/// Atomic opcode table 1717243830Sdim/// 1718223017Sdimenum AtomicOpc { 1719243830Sdim ADD, 1720243830Sdim SUB, 1721243830Sdim INC, 1722243830Sdim DEC, 1723223017Sdim OR, 1724223017Sdim AND, 1725223017Sdim XOR, 1726223017Sdim AtomicOpcEnd 1727223017Sdim}; 1728223017Sdim 1729223017Sdimenum AtomicSz { 1730223017Sdim ConstantI8, 1731223017Sdim I8, 1732223017Sdim SextConstantI16, 1733223017Sdim ConstantI16, 1734223017Sdim I16, 1735223017Sdim SextConstantI32, 1736223017Sdim ConstantI32, 1737223017Sdim I32, 1738223017Sdim SextConstantI64, 1739223017Sdim ConstantI64, 1740223017Sdim I64, 1741223017Sdim AtomicSzEnd 1742223017Sdim}; 1743223017Sdim 1744234353Sdimstatic const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { 1745223017Sdim { 1746243830Sdim X86::LOCK_ADD8mi, 1747243830Sdim X86::LOCK_ADD8mr, 1748243830Sdim X86::LOCK_ADD16mi8, 1749243830Sdim X86::LOCK_ADD16mi, 1750243830Sdim X86::LOCK_ADD16mr, 1751243830Sdim X86::LOCK_ADD32mi8, 1752243830Sdim X86::LOCK_ADD32mi, 1753243830Sdim X86::LOCK_ADD32mr, 1754243830Sdim X86::LOCK_ADD64mi8, 1755243830Sdim X86::LOCK_ADD64mi32, 1756243830Sdim X86::LOCK_ADD64mr, 1757243830Sdim }, 1758243830Sdim { 1759243830Sdim X86::LOCK_SUB8mi, 1760243830Sdim X86::LOCK_SUB8mr, 1761243830Sdim X86::LOCK_SUB16mi8, 1762243830Sdim X86::LOCK_SUB16mi, 1763243830Sdim X86::LOCK_SUB16mr, 1764243830Sdim X86::LOCK_SUB32mi8, 1765243830Sdim X86::LOCK_SUB32mi, 1766243830Sdim X86::LOCK_SUB32mr, 1767243830Sdim X86::LOCK_SUB64mi8, 1768243830Sdim X86::LOCK_SUB64mi32, 1769243830Sdim X86::LOCK_SUB64mr, 1770243830Sdim }, 1771243830Sdim { 1772243830Sdim 0, 1773243830Sdim X86::LOCK_INC8m, 1774243830Sdim 0, 1775243830Sdim 0, 1776243830Sdim X86::LOCK_INC16m, 1777243830Sdim 0, 1778243830Sdim 0, 1779243830Sdim X86::LOCK_INC32m, 1780243830Sdim 0, 1781243830Sdim 0, 1782243830Sdim X86::LOCK_INC64m, 1783243830Sdim }, 1784243830Sdim { 1785243830Sdim 0, 1786243830Sdim X86::LOCK_DEC8m, 1787243830Sdim 0, 1788243830Sdim 0, 1789243830Sdim X86::LOCK_DEC16m, 1790243830Sdim 0, 1791243830Sdim 0, 1792243830Sdim X86::LOCK_DEC32m, 1793243830Sdim 0, 1794243830Sdim 0, 1795243830Sdim X86::LOCK_DEC64m, 1796243830Sdim }, 1797243830Sdim { 1798223017Sdim X86::LOCK_OR8mi, 1799223017Sdim X86::LOCK_OR8mr, 1800223017Sdim X86::LOCK_OR16mi8, 1801223017Sdim X86::LOCK_OR16mi, 1802223017Sdim X86::LOCK_OR16mr, 1803223017Sdim X86::LOCK_OR32mi8, 1804223017Sdim X86::LOCK_OR32mi, 1805223017Sdim X86::LOCK_OR32mr, 1806223017Sdim X86::LOCK_OR64mi8, 1807223017Sdim X86::LOCK_OR64mi32, 1808243830Sdim X86::LOCK_OR64mr, 1809223017Sdim }, 1810223017Sdim { 1811223017Sdim X86::LOCK_AND8mi, 1812223017Sdim X86::LOCK_AND8mr, 1813223017Sdim X86::LOCK_AND16mi8, 1814223017Sdim X86::LOCK_AND16mi, 1815223017Sdim X86::LOCK_AND16mr, 1816223017Sdim X86::LOCK_AND32mi8, 1817223017Sdim X86::LOCK_AND32mi, 1818223017Sdim X86::LOCK_AND32mr, 1819223017Sdim X86::LOCK_AND64mi8, 1820223017Sdim X86::LOCK_AND64mi32, 1821243830Sdim X86::LOCK_AND64mr, 1822223017Sdim }, 1823223017Sdim { 1824223017Sdim X86::LOCK_XOR8mi, 1825223017Sdim X86::LOCK_XOR8mr, 1826223017Sdim X86::LOCK_XOR16mi8, 1827223017Sdim X86::LOCK_XOR16mi, 1828223017Sdim X86::LOCK_XOR16mr, 1829223017Sdim X86::LOCK_XOR32mi8, 1830223017Sdim X86::LOCK_XOR32mi, 1831223017Sdim X86::LOCK_XOR32mr, 1832223017Sdim X86::LOCK_XOR64mi8, 1833223017Sdim X86::LOCK_XOR64mi32, 1834243830Sdim X86::LOCK_XOR64mr, 1835223017Sdim } 1836223017Sdim}; 1837223017Sdim 1838243830Sdim// Return the target constant operand for atomic-load-op and do simple 1839243830Sdim// translations, such as from atomic-load-add to lock-sub. The return value is 1840243830Sdim// one of the following 3 cases: 1841243830Sdim// + target-constant, the operand could be supported as a target constant. 1842243830Sdim// + empty, the operand is not needed any more with the new op selected. 1843243830Sdim// + non-empty, otherwise. 1844243830Sdimstatic SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, 1845261991Sdim SDLoc dl, 1846261991Sdim enum AtomicOpc &Op, MVT NVT, 1847280031Sdim SDValue Val, 1848280031Sdim const X86Subtarget *Subtarget) { 1849243830Sdim if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) { 1850243830Sdim int64_t CNVal = CN->getSExtValue(); 1851243830Sdim // Quit if not 32-bit imm. 1852243830Sdim if ((int32_t)CNVal != CNVal) 1853243830Sdim return Val; 1854280031Sdim // Quit if INT32_MIN: it would be negated as it is negative and overflow, 1855280031Sdim // producing an immediate that does not fit in the 32 bits available for 1856280031Sdim // an immediate operand to sub. However, it still fits in 32 bits for the 1857280031Sdim // add (since it is not negated) so we can return target-constant. 1858280031Sdim if (CNVal == INT32_MIN) 1859288943Sdim return CurDAG->getTargetConstant(CNVal, dl, NVT); 1860243830Sdim // For atomic-load-add, we could do some optimizations. 1861243830Sdim if (Op == ADD) { 1862243830Sdim // Translate to INC/DEC if ADD by 1 or -1. 1863280031Sdim if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) { 1864243830Sdim Op = (CNVal == 1) ? INC : DEC; 1865243830Sdim // No more constant operand after being translated into INC/DEC. 1866243830Sdim return SDValue(); 1867243830Sdim } 1868243830Sdim // Translate to SUB if ADD by negative value. 1869243830Sdim if (CNVal < 0) { 1870243830Sdim Op = SUB; 1871243830Sdim CNVal = -CNVal; 1872243830Sdim } 1873243830Sdim } 1874288943Sdim return CurDAG->getTargetConstant(CNVal, dl, NVT); 1875243830Sdim } 1876243830Sdim 1877243830Sdim // If the value operand is single-used, try to optimize it. 1878243830Sdim if (Op == ADD && Val.hasOneUse()) { 1879243830Sdim // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). 1880243830Sdim if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { 1881243830Sdim Op = SUB; 1882243830Sdim return Val.getOperand(1); 1883243830Sdim } 1884243830Sdim // A special case for i16, which needs truncating as, in most cases, it's 1885243830Sdim // promoted to i32. We will translate 1886243830Sdim // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) 1887243830Sdim if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && 1888243830Sdim Val.getOperand(0).getOpcode() == ISD::SUB && 1889243830Sdim X86::isZeroNode(Val.getOperand(0).getOperand(0))) { 1890243830Sdim Op = SUB; 1891243830Sdim Val = Val.getOperand(0); 1892243830Sdim return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, 1893243830Sdim Val.getOperand(1)); 1894243830Sdim } 1895243830Sdim } 1896243830Sdim 1897243830Sdim return Val; 1898243830Sdim} 1899243830Sdim 1900296417SdimSDNode *X86DAGToDAGISel::selectAtomicLoadArith(SDNode *Node, MVT NVT) { 1901223017Sdim if (Node->hasAnyUseOfValue(0)) 1902276479Sdim return nullptr; 1903239462Sdim 1904261991Sdim SDLoc dl(Node); 1905243830Sdim 1906223017Sdim // Optimize common patterns for __sync_or_and_fetch and similar arith 1907223017Sdim // operations where the result is not used. This allows us to use the "lock" 1908223017Sdim // version of the arithmetic instruction. 1909223017Sdim SDValue Chain = Node->getOperand(0); 1910223017Sdim SDValue Ptr = Node->getOperand(1); 1911223017Sdim SDValue Val = Node->getOperand(2); 1912280031Sdim SDValue Base, Scale, Index, Disp, Segment; 1913296417Sdim if (!selectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment)) 1914276479Sdim return nullptr; 1915223017Sdim 1916223017Sdim // Which index into the table. 1917223017Sdim enum AtomicOpc Op; 1918223017Sdim switch (Node->getOpcode()) { 1919243830Sdim default: 1920276479Sdim return nullptr; 1921223017Sdim case ISD::ATOMIC_LOAD_OR: 1922223017Sdim Op = OR; 1923223017Sdim break; 1924223017Sdim case ISD::ATOMIC_LOAD_AND: 1925223017Sdim Op = AND; 1926223017Sdim break; 1927223017Sdim case ISD::ATOMIC_LOAD_XOR: 1928223017Sdim Op = XOR; 1929223017Sdim break; 1930243830Sdim case ISD::ATOMIC_LOAD_ADD: 1931243830Sdim Op = ADD; 1932243830Sdim break; 1933223017Sdim } 1934251662Sdim 1935280031Sdim Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget); 1936243830Sdim bool isUnOp = !Val.getNode(); 1937243830Sdim bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); 1938239462Sdim 1939223017Sdim unsigned Opc = 0; 1940261991Sdim switch (NVT.SimpleTy) { 1941276479Sdim default: return nullptr; 1942223017Sdim case MVT::i8: 1943223017Sdim if (isCN) 1944223017Sdim Opc = AtomicOpcTbl[Op][ConstantI8]; 1945223017Sdim else 1946223017Sdim Opc = AtomicOpcTbl[Op][I8]; 1947223017Sdim break; 1948223017Sdim case MVT::i16: 1949223017Sdim if (isCN) { 1950223017Sdim if (immSext8(Val.getNode())) 1951223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI16]; 1952223017Sdim else 1953223017Sdim Opc = AtomicOpcTbl[Op][ConstantI16]; 1954223017Sdim } else 1955223017Sdim Opc = AtomicOpcTbl[Op][I16]; 1956223017Sdim break; 1957223017Sdim case MVT::i32: 1958223017Sdim if (isCN) { 1959223017Sdim if (immSext8(Val.getNode())) 1960223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI32]; 1961223017Sdim else 1962223017Sdim Opc = AtomicOpcTbl[Op][ConstantI32]; 1963223017Sdim } else 1964223017Sdim Opc = AtomicOpcTbl[Op][I32]; 1965223017Sdim break; 1966223017Sdim case MVT::i64: 1967223017Sdim if (isCN) { 1968223017Sdim if (immSext8(Val.getNode())) 1969223017Sdim Opc = AtomicOpcTbl[Op][SextConstantI64]; 1970223017Sdim else if (i64immSExt32(Val.getNode())) 1971223017Sdim Opc = AtomicOpcTbl[Op][ConstantI64]; 1972280031Sdim else 1973280031Sdim llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith"); 1974280031Sdim } else 1975280031Sdim Opc = AtomicOpcTbl[Op][I64]; 1976223017Sdim break; 1977223017Sdim } 1978239462Sdim 1979224145Sdim assert(Opc != 0 && "Invalid arith lock transform!"); 1980224145Sdim 1981280031Sdim // Building the new node. 1982243830Sdim SDValue Ret; 1983243830Sdim if (isUnOp) { 1984280031Sdim SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain }; 1985251662Sdim Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); 1986243830Sdim } else { 1987280031Sdim SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain }; 1988251662Sdim Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); 1989243830Sdim } 1990280031Sdim 1991280031Sdim // Copying the MachineMemOperand. 1992280031Sdim MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1993280031Sdim MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1994223017Sdim cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1995280031Sdim 1996280031Sdim // We need to have two outputs as that is what the original instruction had. 1997280031Sdim // So we add a dummy, undefined output. This is safe as we checked first 1998280031Sdim // that no-one uses our output anyway. 1999280031Sdim SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 2000280031Sdim dl, NVT), 0); 2001223017Sdim SDValue RetVals[] = { Undef, Ret }; 2002276479Sdim return CurDAG->getMergeValues(RetVals, dl).getNode(); 2003223017Sdim} 2004223017Sdim 2005296417Sdim/// Test whether the given X86ISD::CMP node has any uses which require the SF 2006296417Sdim/// or OF bits to be accurate. 2007296417Sdimstatic bool hasNoSignedComparisonUses(SDNode *N) { 2008198090Srdivacky // Examine each user of the node. 2009198090Srdivacky for (SDNode::use_iterator UI = N->use_begin(), 2010198090Srdivacky UE = N->use_end(); UI != UE; ++UI) { 2011198090Srdivacky // Only examine CopyToReg uses. 2012198090Srdivacky if (UI->getOpcode() != ISD::CopyToReg) 2013198090Srdivacky return false; 2014198090Srdivacky // Only examine CopyToReg uses that copy to EFLAGS. 2015198090Srdivacky if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 2016198090Srdivacky X86::EFLAGS) 2017198090Srdivacky return false; 2018198090Srdivacky // Examine each user of the CopyToReg use. 2019198090Srdivacky for (SDNode::use_iterator FlagUI = UI->use_begin(), 2020198090Srdivacky FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 2021198090Srdivacky // Only examine the Flag result. 2022198090Srdivacky if (FlagUI.getUse().getResNo() != 1) continue; 2023198090Srdivacky // Anything unusual: assume conservatively. 2024198090Srdivacky if (!FlagUI->isMachineOpcode()) return false; 2025198090Srdivacky // Examine the opcode of the user. 2026198090Srdivacky switch (FlagUI->getMachineOpcode()) { 2027198090Srdivacky // These comparisons don't treat the most significant bit specially. 2028198090Srdivacky case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 2029198090Srdivacky case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 2030198090Srdivacky case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 2031198090Srdivacky case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 2032280031Sdim case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1: 2033280031Sdim case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1: 2034198090Srdivacky case X86::CMOVA16rr: case X86::CMOVA16rm: 2035198090Srdivacky case X86::CMOVA32rr: case X86::CMOVA32rm: 2036198090Srdivacky case X86::CMOVA64rr: case X86::CMOVA64rm: 2037198090Srdivacky case X86::CMOVAE16rr: case X86::CMOVAE16rm: 2038198090Srdivacky case X86::CMOVAE32rr: case X86::CMOVAE32rm: 2039198090Srdivacky case X86::CMOVAE64rr: case X86::CMOVAE64rm: 2040198090Srdivacky case X86::CMOVB16rr: case X86::CMOVB16rm: 2041198090Srdivacky case X86::CMOVB32rr: case X86::CMOVB32rm: 2042198090Srdivacky case X86::CMOVB64rr: case X86::CMOVB64rm: 2043198090Srdivacky case X86::CMOVBE16rr: case X86::CMOVBE16rm: 2044198090Srdivacky case X86::CMOVBE32rr: case X86::CMOVBE32rm: 2045198090Srdivacky case X86::CMOVBE64rr: case X86::CMOVBE64rm: 2046198090Srdivacky case X86::CMOVE16rr: case X86::CMOVE16rm: 2047198090Srdivacky case X86::CMOVE32rr: case X86::CMOVE32rm: 2048198090Srdivacky case X86::CMOVE64rr: case X86::CMOVE64rm: 2049198090Srdivacky case X86::CMOVNE16rr: case X86::CMOVNE16rm: 2050198090Srdivacky case X86::CMOVNE32rr: case X86::CMOVNE32rm: 2051198090Srdivacky case X86::CMOVNE64rr: case X86::CMOVNE64rm: 2052198090Srdivacky case X86::CMOVNP16rr: case X86::CMOVNP16rm: 2053198090Srdivacky case X86::CMOVNP32rr: case X86::CMOVNP32rm: 2054198090Srdivacky case X86::CMOVNP64rr: case X86::CMOVNP64rm: 2055198090Srdivacky case X86::CMOVP16rr: case X86::CMOVP16rm: 2056198090Srdivacky case X86::CMOVP32rr: case X86::CMOVP32rm: 2057198090Srdivacky case X86::CMOVP64rr: case X86::CMOVP64rm: 2058198090Srdivacky continue; 2059198090Srdivacky // Anything else: assume conservatively. 2060198090Srdivacky default: return false; 2061198090Srdivacky } 2062198090Srdivacky } 2063198090Srdivacky } 2064198090Srdivacky return true; 2065198090Srdivacky} 2066198090Srdivacky 2067296417Sdim/// Check whether or not the chain ending in StoreNode is suitable for doing 2068296417Sdim/// the {load; increment or decrement; store} to modify transformation. 2069239462Sdimstatic bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, 2070234353Sdim SDValue StoredVal, SelectionDAG *CurDAG, 2071234353Sdim LoadSDNode* &LoadNode, SDValue &InputChain) { 2072234353Sdim 2073234353Sdim // is the value stored the result of a DEC or INC? 2074234353Sdim if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false; 2075234353Sdim 2076234353Sdim // is the stored value result 0 of the load? 2077234353Sdim if (StoredVal.getResNo() != 0) return false; 2078234353Sdim 2079234353Sdim // are there other uses of the loaded value than the inc or dec? 2080234353Sdim if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; 2081234353Sdim 2082234353Sdim // is the store non-extending and non-indexed? 2083234353Sdim if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) 2084234353Sdim return false; 2085234353Sdim 2086234353Sdim SDValue Load = StoredVal->getOperand(0); 2087234353Sdim // Is the stored value a non-extending and non-indexed load? 2088234353Sdim if (!ISD::isNormalLoad(Load.getNode())) return false; 2089234353Sdim 2090234353Sdim // Return LoadNode by reference. 2091234353Sdim LoadNode = cast<LoadSDNode>(Load); 2092234353Sdim // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) 2093239462Sdim EVT LdVT = LoadNode->getMemoryVT(); 2094239462Sdim if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && 2095234353Sdim LdVT != MVT::i8) 2096234353Sdim return false; 2097234353Sdim 2098234353Sdim // Is store the only read of the loaded value? 2099234353Sdim if (!Load.hasOneUse()) 2100234353Sdim return false; 2101239462Sdim 2102234353Sdim // Is the address of the store the same as the load? 2103234353Sdim if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || 2104234353Sdim LoadNode->getOffset() != StoreNode->getOffset()) 2105234353Sdim return false; 2106234353Sdim 2107234353Sdim // Check if the chain is produced by the load or is a TokenFactor with 2108234353Sdim // the load output chain as an operand. Return InputChain by reference. 2109234353Sdim SDValue Chain = StoreNode->getChain(); 2110234353Sdim 2111234353Sdim bool ChainCheck = false; 2112234353Sdim if (Chain == Load.getValue(1)) { 2113234353Sdim ChainCheck = true; 2114234353Sdim InputChain = LoadNode->getChain(); 2115234353Sdim } else if (Chain.getOpcode() == ISD::TokenFactor) { 2116234353Sdim SmallVector<SDValue, 4> ChainOps; 2117234353Sdim for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { 2118234353Sdim SDValue Op = Chain.getOperand(i); 2119234353Sdim if (Op == Load.getValue(1)) { 2120234353Sdim ChainCheck = true; 2121234353Sdim continue; 2122234353Sdim } 2123239462Sdim 2124239462Sdim // Make sure using Op as part of the chain would not cause a cycle here. 2125239462Sdim // In theory, we could check whether the chain node is a predecessor of 2126239462Sdim // the load. But that can be very expensive. Instead visit the uses and 2127239462Sdim // make sure they all have smaller node id than the load. 2128239462Sdim int LoadId = LoadNode->getNodeId(); 2129239462Sdim for (SDNode::use_iterator UI = Op.getNode()->use_begin(), 2130239462Sdim UE = UI->use_end(); UI != UE; ++UI) { 2131239462Sdim if (UI.getUse().getResNo() != 0) 2132239462Sdim continue; 2133239462Sdim if (UI->getNodeId() > LoadId) 2134239462Sdim return false; 2135239462Sdim } 2136239462Sdim 2137234353Sdim ChainOps.push_back(Op); 2138234353Sdim } 2139234353Sdim 2140234353Sdim if (ChainCheck) 2141234353Sdim // Make a new TokenFactor with all the other input chains except 2142234353Sdim // for the load. 2143261991Sdim InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), 2144276479Sdim MVT::Other, ChainOps); 2145234353Sdim } 2146234353Sdim if (!ChainCheck) 2147234353Sdim return false; 2148234353Sdim 2149234353Sdim return true; 2150234353Sdim} 2151234353Sdim 2152296417Sdim/// Get the appropriate X86 opcode for an in-memory increment or decrement. 2153296417Sdim/// Opc should be X86ISD::DEC or X86ISD::INC. 2154234353Sdimstatic unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { 2155234353Sdim if (Opc == X86ISD::DEC) { 2156234353Sdim if (LdVT == MVT::i64) return X86::DEC64m; 2157234353Sdim if (LdVT == MVT::i32) return X86::DEC32m; 2158234353Sdim if (LdVT == MVT::i16) return X86::DEC16m; 2159234353Sdim if (LdVT == MVT::i8) return X86::DEC8m; 2160234353Sdim } else { 2161234353Sdim assert(Opc == X86ISD::INC && "unrecognized opcode"); 2162234353Sdim if (LdVT == MVT::i64) return X86::INC64m; 2163234353Sdim if (LdVT == MVT::i32) return X86::INC32m; 2164234353Sdim if (LdVT == MVT::i16) return X86::INC16m; 2165234353Sdim if (LdVT == MVT::i8) return X86::INC8m; 2166234353Sdim } 2167234353Sdim llvm_unreachable("unrecognized size for LdVT"); 2168234353Sdim} 2169234353Sdim 2170296417Sdim/// Customized ISel for GATHER operations. 2171296417SdimSDNode *X86DAGToDAGISel::selectGather(SDNode *Node, unsigned Opc) { 2172239462Sdim // Operands of Gather: VSrc, Base, VIdx, VMask, Scale 2173239462Sdim SDValue Chain = Node->getOperand(0); 2174239462Sdim SDValue VSrc = Node->getOperand(2); 2175239462Sdim SDValue Base = Node->getOperand(3); 2176239462Sdim SDValue VIdx = Node->getOperand(4); 2177239462Sdim SDValue VMask = Node->getOperand(5); 2178239462Sdim ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); 2179239462Sdim if (!Scale) 2180276479Sdim return nullptr; 2181239462Sdim 2182239462Sdim SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), 2183239462Sdim MVT::Other); 2184239462Sdim 2185288943Sdim SDLoc DL(Node); 2186288943Sdim 2187239462Sdim // Memory Operands: Base, Scale, Index, Disp, Segment 2188288943Sdim SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32); 2189239462Sdim SDValue Segment = CurDAG->getRegister(0, MVT::i32); 2190288943Sdim const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx, 2191239462Sdim Disp, Segment, VMask, Chain}; 2192288943Sdim SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops); 2193239462Sdim // Node has 2 outputs: VDst and MVT::Other. 2194239462Sdim // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. 2195239462Sdim // We replace VDst of Node with VDst of ResNode, and Other of Node with Other 2196239462Sdim // of ResNode. 2197239462Sdim ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); 2198239462Sdim ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2)); 2199239462Sdim return ResNode; 2200239462Sdim} 2201239462Sdim 2202202375SrdivackySDNode *X86DAGToDAGISel::Select(SDNode *Node) { 2203261991Sdim MVT NVT = Node->getSimpleValueType(0); 2204193323Sed unsigned Opc, MOpc; 2205193323Sed unsigned Opcode = Node->getOpcode(); 2206261991Sdim SDLoc dl(Node); 2207239462Sdim 2208204642Srdivacky DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); 2209193323Sed 2210193323Sed if (Node->isMachineOpcode()) { 2211204642Srdivacky DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); 2212255804Sdim Node->setNodeId(-1); 2213276479Sdim return nullptr; // Already selected. 2214193323Sed } 2215193323Sed 2216193323Sed switch (Opcode) { 2217198090Srdivacky default: break; 2218296417Sdim case ISD::BRIND: { 2219296417Sdim if (Subtarget->isTargetNaCl()) 2220296417Sdim // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We 2221296417Sdim // leave the instruction alone. 2222296417Sdim break; 2223296417Sdim if (Subtarget->isTarget64BitILP32()) { 2224296417Sdim // Converts a 32-bit register to a 64-bit, zero-extended version of 2225296417Sdim // it. This is needed because x86-64 can do many things, but jmp %r32 2226296417Sdim // ain't one of them. 2227296417Sdim const SDValue &Target = Node->getOperand(1); 2228296417Sdim assert(Target.getSimpleValueType() == llvm::MVT::i32); 2229296417Sdim SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64)); 2230296417Sdim SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other, 2231296417Sdim Node->getOperand(0), ZextTarget); 2232296417Sdim ReplaceUses(SDValue(Node, 0), Brind); 2233296417Sdim SelectCode(ZextTarget.getNode()); 2234296417Sdim SelectCode(Brind.getNode()); 2235296417Sdim return nullptr; 2236296417Sdim } 2237296417Sdim break; 2238296417Sdim } 2239239462Sdim case ISD::INTRINSIC_W_CHAIN: { 2240239462Sdim unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2241239462Sdim switch (IntNo) { 2242239462Sdim default: break; 2243239462Sdim case Intrinsic::x86_avx2_gather_d_pd: 2244239462Sdim case Intrinsic::x86_avx2_gather_d_pd_256: 2245239462Sdim case Intrinsic::x86_avx2_gather_q_pd: 2246239462Sdim case Intrinsic::x86_avx2_gather_q_pd_256: 2247239462Sdim case Intrinsic::x86_avx2_gather_d_ps: 2248239462Sdim case Intrinsic::x86_avx2_gather_d_ps_256: 2249239462Sdim case Intrinsic::x86_avx2_gather_q_ps: 2250239462Sdim case Intrinsic::x86_avx2_gather_q_ps_256: 2251239462Sdim case Intrinsic::x86_avx2_gather_d_q: 2252239462Sdim case Intrinsic::x86_avx2_gather_d_q_256: 2253239462Sdim case Intrinsic::x86_avx2_gather_q_q: 2254239462Sdim case Intrinsic::x86_avx2_gather_q_q_256: 2255239462Sdim case Intrinsic::x86_avx2_gather_d_d: 2256239462Sdim case Intrinsic::x86_avx2_gather_d_d_256: 2257239462Sdim case Intrinsic::x86_avx2_gather_q_d: 2258239462Sdim case Intrinsic::x86_avx2_gather_q_d_256: { 2259261991Sdim if (!Subtarget->hasAVX2()) 2260261991Sdim break; 2261239462Sdim unsigned Opc; 2262239462Sdim switch (IntNo) { 2263239462Sdim default: llvm_unreachable("Impossible intrinsic"); 2264239462Sdim case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; 2265239462Sdim case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; 2266239462Sdim case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; 2267239462Sdim case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; 2268239462Sdim case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; 2269239462Sdim case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; 2270239462Sdim case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; 2271239462Sdim case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; 2272239462Sdim case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; 2273239462Sdim case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; 2274239462Sdim case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; 2275239462Sdim case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; 2276239462Sdim case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; 2277239462Sdim case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; 2278239462Sdim case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; 2279239462Sdim case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; 2280239462Sdim } 2281296417Sdim SDNode *RetVal = selectGather(Node, Opc); 2282239462Sdim if (RetVal) 2283239462Sdim // We already called ReplaceUses inside SelectGather. 2284276479Sdim return nullptr; 2285239462Sdim break; 2286239462Sdim } 2287239462Sdim } 2288239462Sdim break; 2289239462Sdim } 2290198090Srdivacky case X86ISD::GlobalBaseReg: 2291198090Srdivacky return getGlobalBaseReg(); 2292193323Sed 2293280031Sdim case X86ISD::SHRUNKBLEND: { 2294280031Sdim // SHRUNKBLEND selects like a regular VSELECT. 2295280031Sdim SDValue VSelect = CurDAG->getNode( 2296280031Sdim ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0), 2297280031Sdim Node->getOperand(1), Node->getOperand(2)); 2298280031Sdim ReplaceUses(SDValue(Node, 0), VSelect); 2299280031Sdim SelectCode(VSelect.getNode()); 2300280031Sdim // We already called ReplaceUses. 2301280031Sdim return nullptr; 2302280031Sdim } 2303239462Sdim 2304223017Sdim case ISD::ATOMIC_LOAD_XOR: 2305223017Sdim case ISD::ATOMIC_LOAD_AND: 2306243830Sdim case ISD::ATOMIC_LOAD_OR: 2307243830Sdim case ISD::ATOMIC_LOAD_ADD: { 2308296417Sdim SDNode *RetVal = selectAtomicLoadArith(Node, NVT); 2309223017Sdim if (RetVal) 2310223017Sdim return RetVal; 2311223017Sdim break; 2312223017Sdim } 2313221345Sdim case ISD::AND: 2314221345Sdim case ISD::OR: 2315221345Sdim case ISD::XOR: { 2316221345Sdim // For operations of the form (x << C1) op C2, check if we can use a smaller 2317221345Sdim // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. 2318221345Sdim SDValue N0 = Node->getOperand(0); 2319221345Sdim SDValue N1 = Node->getOperand(1); 2320221345Sdim 2321221345Sdim if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse()) 2322221345Sdim break; 2323221345Sdim 2324221345Sdim // i8 is unshrinkable, i16 should be promoted to i32. 2325221345Sdim if (NVT != MVT::i32 && NVT != MVT::i64) 2326221345Sdim break; 2327221345Sdim 2328221345Sdim ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 2329221345Sdim ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 2330221345Sdim if (!Cst || !ShlCst) 2331221345Sdim break; 2332221345Sdim 2333221345Sdim int64_t Val = Cst->getSExtValue(); 2334221345Sdim uint64_t ShlVal = ShlCst->getZExtValue(); 2335221345Sdim 2336221345Sdim // Make sure that we don't change the operation by removing bits. 2337221345Sdim // This only matters for OR and XOR, AND is unaffected. 2338243830Sdim uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1; 2339243830Sdim if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 2340221345Sdim break; 2341221345Sdim 2342288943Sdim unsigned ShlOp, AddOp, Op; 2343261991Sdim MVT CstVT = NVT; 2344221345Sdim 2345221345Sdim // Check the minimum bitwidth for the new constant. 2346221345Sdim // TODO: AND32ri is the same as AND64ri32 with zext imm. 2347221345Sdim // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr 2348221345Sdim // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. 2349221345Sdim if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal)) 2350221345Sdim CstVT = MVT::i8; 2351221345Sdim else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal)) 2352221345Sdim CstVT = MVT::i32; 2353221345Sdim 2354221345Sdim // Bail if there is no smaller encoding. 2355221345Sdim if (NVT == CstVT) 2356221345Sdim break; 2357221345Sdim 2358261991Sdim switch (NVT.SimpleTy) { 2359221345Sdim default: llvm_unreachable("Unsupported VT!"); 2360221345Sdim case MVT::i32: 2361221345Sdim assert(CstVT == MVT::i8); 2362221345Sdim ShlOp = X86::SHL32ri; 2363288943Sdim AddOp = X86::ADD32rr; 2364221345Sdim 2365221345Sdim switch (Opcode) { 2366239462Sdim default: llvm_unreachable("Impossible opcode"); 2367221345Sdim case ISD::AND: Op = X86::AND32ri8; break; 2368221345Sdim case ISD::OR: Op = X86::OR32ri8; break; 2369221345Sdim case ISD::XOR: Op = X86::XOR32ri8; break; 2370221345Sdim } 2371221345Sdim break; 2372221345Sdim case MVT::i64: 2373221345Sdim assert(CstVT == MVT::i8 || CstVT == MVT::i32); 2374221345Sdim ShlOp = X86::SHL64ri; 2375288943Sdim AddOp = X86::ADD64rr; 2376221345Sdim 2377221345Sdim switch (Opcode) { 2378239462Sdim default: llvm_unreachable("Impossible opcode"); 2379221345Sdim case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; 2380221345Sdim case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; 2381221345Sdim case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; 2382221345Sdim } 2383221345Sdim break; 2384221345Sdim } 2385221345Sdim 2386221345Sdim // Emit the smaller op and the shift. 2387288943Sdim SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT); 2388221345Sdim SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); 2389288943Sdim if (ShlVal == 1) 2390288943Sdim return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0), 2391288943Sdim SDValue(New, 0)); 2392221345Sdim return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), 2393288943Sdim getI8Imm(ShlVal, dl)); 2394221345Sdim } 2395280031Sdim case X86ISD::UMUL8: 2396280031Sdim case X86ISD::SMUL8: { 2397280031Sdim SDValue N0 = Node->getOperand(0); 2398280031Sdim SDValue N1 = Node->getOperand(1); 2399280031Sdim 2400280031Sdim Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r); 2401280031Sdim 2402280031Sdim SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL, 2403280031Sdim N0, SDValue()).getValue(1); 2404280031Sdim 2405280031Sdim SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32); 2406280031Sdim SDValue Ops[] = {N1, InFlag}; 2407280031Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2408280031Sdim 2409280031Sdim ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); 2410280031Sdim ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); 2411280031Sdim return nullptr; 2412280031Sdim } 2413280031Sdim 2414218893Sdim case X86ISD::UMUL: { 2415218893Sdim SDValue N0 = Node->getOperand(0); 2416218893Sdim SDValue N1 = Node->getOperand(1); 2417239462Sdim 2418218893Sdim unsigned LoReg; 2419261991Sdim switch (NVT.SimpleTy) { 2420218893Sdim default: llvm_unreachable("Unsupported VT!"); 2421218893Sdim case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; 2422218893Sdim case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; 2423218893Sdim case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; 2424218893Sdim case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; 2425218893Sdim } 2426239462Sdim 2427218893Sdim SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 2428218893Sdim N0, SDValue()).getValue(1); 2429239462Sdim 2430218893Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); 2431218893Sdim SDValue Ops[] = {N1, InFlag}; 2432251662Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2433239462Sdim 2434218893Sdim ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); 2435218893Sdim ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); 2436218893Sdim ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); 2437276479Sdim return nullptr; 2438218893Sdim } 2439239462Sdim 2440198090Srdivacky case ISD::SMUL_LOHI: 2441198090Srdivacky case ISD::UMUL_LOHI: { 2442198090Srdivacky SDValue N0 = Node->getOperand(0); 2443198090Srdivacky SDValue N1 = Node->getOperand(1); 2444193323Sed 2445198090Srdivacky bool isSigned = Opcode == ISD::SMUL_LOHI; 2446243830Sdim bool hasBMI2 = Subtarget->hasBMI2(); 2447198090Srdivacky if (!isSigned) { 2448261991Sdim switch (NVT.SimpleTy) { 2449198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2450198090Srdivacky case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 2451198090Srdivacky case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 2452243830Sdim case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; 2453243830Sdim MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; 2454243830Sdim case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; 2455243830Sdim MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; 2456193323Sed } 2457198090Srdivacky } else { 2458261991Sdim switch (NVT.SimpleTy) { 2459198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2460198090Srdivacky case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 2461198090Srdivacky case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 2462198090Srdivacky case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 2463198090Srdivacky case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 2464193323Sed } 2465198090Srdivacky } 2466193323Sed 2467243830Sdim unsigned SrcReg, LoReg, HiReg; 2468243830Sdim switch (Opc) { 2469243830Sdim default: llvm_unreachable("Unknown MUL opcode!"); 2470243830Sdim case X86::IMUL8r: 2471243830Sdim case X86::MUL8r: 2472243830Sdim SrcReg = LoReg = X86::AL; HiReg = X86::AH; 2473243830Sdim break; 2474243830Sdim case X86::IMUL16r: 2475243830Sdim case X86::MUL16r: 2476243830Sdim SrcReg = LoReg = X86::AX; HiReg = X86::DX; 2477243830Sdim break; 2478243830Sdim case X86::IMUL32r: 2479243830Sdim case X86::MUL32r: 2480243830Sdim SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; 2481243830Sdim break; 2482243830Sdim case X86::IMUL64r: 2483243830Sdim case X86::MUL64r: 2484243830Sdim SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; 2485243830Sdim break; 2486243830Sdim case X86::MULX32rr: 2487243830Sdim SrcReg = X86::EDX; LoReg = HiReg = 0; 2488243830Sdim break; 2489243830Sdim case X86::MULX64rr: 2490243830Sdim SrcReg = X86::RDX; LoReg = HiReg = 0; 2491243830Sdim break; 2492198090Srdivacky } 2493193323Sed 2494198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 2495296417Sdim bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2496198090Srdivacky // Multiply is commmutative. 2497198090Srdivacky if (!foldedLoad) { 2498296417Sdim foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2499198090Srdivacky if (foldedLoad) 2500198090Srdivacky std::swap(N0, N1); 2501198090Srdivacky } 2502193323Sed 2503243830Sdim SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, 2504239462Sdim N0, SDValue()).getValue(1); 2505243830Sdim SDValue ResHi, ResLo; 2506198090Srdivacky 2507198090Srdivacky if (foldedLoad) { 2508243830Sdim SDValue Chain; 2509198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 2510198090Srdivacky InFlag }; 2511243830Sdim if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { 2512243830Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); 2513251662Sdim SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); 2514243830Sdim ResHi = SDValue(CNode, 0); 2515243830Sdim ResLo = SDValue(CNode, 1); 2516243830Sdim Chain = SDValue(CNode, 2); 2517243830Sdim InFlag = SDValue(CNode, 3); 2518243830Sdim } else { 2519243830Sdim SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 2520251662Sdim SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); 2521243830Sdim Chain = SDValue(CNode, 0); 2522243830Sdim InFlag = SDValue(CNode, 1); 2523243830Sdim } 2524218893Sdim 2525198090Srdivacky // Update the chain. 2526243830Sdim ReplaceUses(N1.getValue(1), Chain); 2527198090Srdivacky } else { 2528243830Sdim SDValue Ops[] = { N1, InFlag }; 2529243830Sdim if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { 2530243830Sdim SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); 2531251662Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2532243830Sdim ResHi = SDValue(CNode, 0); 2533243830Sdim ResLo = SDValue(CNode, 1); 2534243830Sdim InFlag = SDValue(CNode, 2); 2535243830Sdim } else { 2536243830Sdim SDVTList VTs = CurDAG->getVTList(MVT::Glue); 2537251662Sdim SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 2538243830Sdim InFlag = SDValue(CNode, 0); 2539243830Sdim } 2540198090Srdivacky } 2541198090Srdivacky 2542210299Sed // Prevent use of AH in a REX instruction by referencing AX instead. 2543210299Sed if (HiReg == X86::AH && Subtarget->is64Bit() && 2544210299Sed !SDValue(Node, 1).use_empty()) { 2545210299Sed SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2546210299Sed X86::AX, MVT::i16, InFlag); 2547210299Sed InFlag = Result.getValue(2); 2548210299Sed // Get the low part if needed. Don't use getCopyFromReg for aliasing 2549210299Sed // registers. 2550210299Sed if (!SDValue(Node, 0).use_empty()) 2551210299Sed ReplaceUses(SDValue(Node, 1), 2552210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2553210299Sed 2554210299Sed // Shift AX down 8 bits. 2555210299Sed Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2556210299Sed Result, 2557288943Sdim CurDAG->getTargetConstant(8, dl, MVT::i8)), 2558288943Sdim 0); 2559210299Sed // Then truncate it down to i8. 2560210299Sed ReplaceUses(SDValue(Node, 1), 2561210299Sed CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 2562210299Sed } 2563198090Srdivacky // Copy the low half of the result, if it is needed. 2564202375Srdivacky if (!SDValue(Node, 0).use_empty()) { 2565276479Sdim if (!ResLo.getNode()) { 2566243830Sdim assert(LoReg && "Register for low half is not defined!"); 2567243830Sdim ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, 2568243830Sdim InFlag); 2569243830Sdim InFlag = ResLo.getValue(2); 2570243830Sdim } 2571243830Sdim ReplaceUses(SDValue(Node, 0), ResLo); 2572243830Sdim DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); 2573198090Srdivacky } 2574198090Srdivacky // Copy the high half of the result, if it is needed. 2575202375Srdivacky if (!SDValue(Node, 1).use_empty()) { 2576276479Sdim if (!ResHi.getNode()) { 2577243830Sdim assert(HiReg && "Register for high half is not defined!"); 2578243830Sdim ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, 2579243830Sdim InFlag); 2580243830Sdim InFlag = ResHi.getValue(2); 2581243830Sdim } 2582243830Sdim ReplaceUses(SDValue(Node, 1), ResHi); 2583243830Sdim DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); 2584198090Srdivacky } 2585239462Sdim 2586276479Sdim return nullptr; 2587198090Srdivacky } 2588193323Sed 2589198090Srdivacky case ISD::SDIVREM: 2590280031Sdim case ISD::UDIVREM: 2591280031Sdim case X86ISD::SDIVREM8_SEXT_HREG: 2592280031Sdim case X86ISD::UDIVREM8_ZEXT_HREG: { 2593198090Srdivacky SDValue N0 = Node->getOperand(0); 2594198090Srdivacky SDValue N1 = Node->getOperand(1); 2595193323Sed 2596280031Sdim bool isSigned = (Opcode == ISD::SDIVREM || 2597280031Sdim Opcode == X86ISD::SDIVREM8_SEXT_HREG); 2598198090Srdivacky if (!isSigned) { 2599261991Sdim switch (NVT.SimpleTy) { 2600198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2601198090Srdivacky case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 2602198090Srdivacky case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 2603198090Srdivacky case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 2604198090Srdivacky case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 2605193323Sed } 2606198090Srdivacky } else { 2607261991Sdim switch (NVT.SimpleTy) { 2608198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2609198090Srdivacky case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 2610198090Srdivacky case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 2611198090Srdivacky case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 2612198090Srdivacky case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 2613198090Srdivacky } 2614198090Srdivacky } 2615193323Sed 2616201360Srdivacky unsigned LoReg, HiReg, ClrReg; 2617261991Sdim unsigned SExtOpcode; 2618261991Sdim switch (NVT.SimpleTy) { 2619198090Srdivacky default: llvm_unreachable("Unsupported VT!"); 2620198090Srdivacky case MVT::i8: 2621201360Srdivacky LoReg = X86::AL; ClrReg = HiReg = X86::AH; 2622198090Srdivacky SExtOpcode = X86::CBW; 2623198090Srdivacky break; 2624198090Srdivacky case MVT::i16: 2625198090Srdivacky LoReg = X86::AX; HiReg = X86::DX; 2626261991Sdim ClrReg = X86::DX; 2627198090Srdivacky SExtOpcode = X86::CWD; 2628198090Srdivacky break; 2629198090Srdivacky case MVT::i32: 2630201360Srdivacky LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; 2631198090Srdivacky SExtOpcode = X86::CDQ; 2632198090Srdivacky break; 2633198090Srdivacky case MVT::i64: 2634201360Srdivacky LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; 2635198090Srdivacky SExtOpcode = X86::CQO; 2636198090Srdivacky break; 2637198090Srdivacky } 2638193323Sed 2639198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 2640296417Sdim bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 2641198090Srdivacky bool signBitIsZero = CurDAG->SignBitIsZero(N0); 2642198090Srdivacky 2643198090Srdivacky SDValue InFlag; 2644198090Srdivacky if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 2645198090Srdivacky // Special case for div8, just use a move with zero extension to AX to 2646198090Srdivacky // clear the upper 8 bits (AH). 2647198090Srdivacky SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 2648296417Sdim if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 2649198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 2650198090Srdivacky Move = 2651223017Sdim SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, 2652251662Sdim MVT::Other, Ops), 0); 2653198090Srdivacky Chain = Move.getValue(1); 2654198090Srdivacky ReplaceUses(N0.getValue(1), Chain); 2655193323Sed } else { 2656198090Srdivacky Move = 2657223017Sdim SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0); 2658198090Srdivacky Chain = CurDAG->getEntryNode(); 2659198090Srdivacky } 2660223017Sdim Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue()); 2661198090Srdivacky InFlag = Chain.getValue(1); 2662198090Srdivacky } else { 2663198090Srdivacky InFlag = 2664198090Srdivacky CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 2665198090Srdivacky LoReg, N0, SDValue()).getValue(1); 2666198090Srdivacky if (isSigned && !signBitIsZero) { 2667198090Srdivacky // Sign extend the low part into the high part. 2668193323Sed InFlag = 2669218893Sdim SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); 2670198090Srdivacky } else { 2671198090Srdivacky // Zero out the high part, effectively zero extending the input. 2672280031Sdim SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); 2673261991Sdim switch (NVT.SimpleTy) { 2674261991Sdim case MVT::i16: 2675261991Sdim ClrNode = 2676261991Sdim SDValue(CurDAG->getMachineNode( 2677261991Sdim TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, 2678288943Sdim CurDAG->getTargetConstant(X86::sub_16bit, dl, 2679288943Sdim MVT::i32)), 2680261991Sdim 0); 2681261991Sdim break; 2682261991Sdim case MVT::i32: 2683261991Sdim break; 2684261991Sdim case MVT::i64: 2685261991Sdim ClrNode = 2686261991Sdim SDValue(CurDAG->getMachineNode( 2687261991Sdim TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, 2688288943Sdim CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode, 2689288943Sdim CurDAG->getTargetConstant(X86::sub_32bit, dl, 2690288943Sdim MVT::i32)), 2691261991Sdim 0); 2692261991Sdim break; 2693261991Sdim default: 2694261991Sdim llvm_unreachable("Unexpected division source"); 2695261991Sdim } 2696261991Sdim 2697201360Srdivacky InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, 2698198090Srdivacky ClrNode, InFlag).getValue(1); 2699193323Sed } 2700198090Srdivacky } 2701193323Sed 2702198090Srdivacky if (foldedLoad) { 2703198090Srdivacky SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 2704198090Srdivacky InFlag }; 2705198090Srdivacky SDNode *CNode = 2706251662Sdim CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); 2707198090Srdivacky InFlag = SDValue(CNode, 1); 2708198090Srdivacky // Update the chain. 2709198090Srdivacky ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 2710198090Srdivacky } else { 2711198090Srdivacky InFlag = 2712218893Sdim SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); 2713198090Srdivacky } 2714198090Srdivacky 2715280031Sdim // Prevent use of AH in a REX instruction by explicitly copying it to 2716280031Sdim // an ABCD_L register. 2717261991Sdim // 2718261991Sdim // The current assumption of the register allocator is that isel 2719280031Sdim // won't generate explicit references to the GR8_ABCD_H registers. If 2720261991Sdim // the allocator and/or the backend get enhanced to be more robust in 2721261991Sdim // that regard, this can be, and should be, removed. 2722280031Sdim if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) { 2723280031Sdim SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8); 2724280031Sdim unsigned AHExtOpcode = 2725280031Sdim isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8; 2726210299Sed 2727280031Sdim SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32, 2728280031Sdim MVT::Glue, AHCopy, InFlag); 2729280031Sdim SDValue Result(RNode, 0); 2730280031Sdim InFlag = SDValue(RNode, 1); 2731210299Sed 2732280031Sdim if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG || 2733280031Sdim Opcode == X86ISD::SDIVREM8_SEXT_HREG) { 2734280031Sdim if (Node->getValueType(1) == MVT::i64) { 2735280031Sdim // It's not possible to directly movsx AH to a 64bit register, because 2736280031Sdim // the latter needs the REX prefix, but the former can't have it. 2737280031Sdim assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG && 2738280031Sdim "Unexpected i64 sext of h-register"); 2739280031Sdim Result = 2740280031Sdim SDValue(CurDAG->getMachineNode( 2741280031Sdim TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, 2742288943Sdim CurDAG->getTargetConstant(0, dl, MVT::i64), Result, 2743288943Sdim CurDAG->getTargetConstant(X86::sub_32bit, dl, 2744288943Sdim MVT::i32)), 2745280031Sdim 0); 2746280031Sdim } 2747280031Sdim } else { 2748280031Sdim Result = 2749280031Sdim CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); 2750280031Sdim } 2751280031Sdim ReplaceUses(SDValue(Node, 1), Result); 2752280031Sdim DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 2753210299Sed } 2754198090Srdivacky // Copy the division (low) result, if it is needed. 2755202375Srdivacky if (!SDValue(Node, 0).use_empty()) { 2756198090Srdivacky SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2757198090Srdivacky LoReg, NVT, InFlag); 2758198090Srdivacky InFlag = Result.getValue(2); 2759202375Srdivacky ReplaceUses(SDValue(Node, 0), Result); 2760204642Srdivacky DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 2761198090Srdivacky } 2762198090Srdivacky // Copy the remainder (high) result, if it is needed. 2763202375Srdivacky if (!SDValue(Node, 1).use_empty()) { 2764210299Sed SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2765210299Sed HiReg, NVT, InFlag); 2766210299Sed InFlag = Result.getValue(2); 2767202375Srdivacky ReplaceUses(SDValue(Node, 1), Result); 2768204642Srdivacky DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 2769198090Srdivacky } 2770276479Sdim return nullptr; 2771198090Srdivacky } 2772193323Sed 2773239462Sdim case X86ISD::CMP: 2774239462Sdim case X86ISD::SUB: { 2775239462Sdim // Sometimes a SUB is used to perform comparison. 2776239462Sdim if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) 2777239462Sdim // This node is not a CMP. 2778239462Sdim break; 2779198090Srdivacky SDValue N0 = Node->getOperand(0); 2780198090Srdivacky SDValue N1 = Node->getOperand(1); 2781198090Srdivacky 2782280031Sdim if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 2783296417Sdim hasNoSignedComparisonUses(Node)) 2784288943Sdim N0 = N0.getOperand(0); 2785280031Sdim 2786198090Srdivacky // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 2787198090Srdivacky // use a smaller encoding. 2788280031Sdim // Look past the truncate if CMP is the only use of it. 2789234353Sdim if ((N0.getNode()->getOpcode() == ISD::AND || 2790234353Sdim (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && 2791234353Sdim N0.getNode()->hasOneUse() && 2792198090Srdivacky N0.getValueType() != MVT::i8 && 2793198090Srdivacky X86::isZeroNode(N1)) { 2794198090Srdivacky ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 2795198090Srdivacky if (!C) break; 2796198090Srdivacky 2797198090Srdivacky // For example, convert "testl %eax, $8" to "testb %al, $8" 2798198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 2799198090Srdivacky (!(C->getZExtValue() & 0x80) || 2800296417Sdim hasNoSignedComparisonUses(Node))) { 2801288943Sdim SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8); 2802198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2803198090Srdivacky 2804198090Srdivacky // On x86-32, only the ABCD registers have 8-bit subregisters. 2805198090Srdivacky if (!Subtarget->is64Bit()) { 2806234353Sdim const TargetRegisterClass *TRC; 2807261991Sdim switch (N0.getSimpleValueType().SimpleTy) { 2808198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2809198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2810198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2811198090Srdivacky } 2812288943Sdim SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 2813198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2814198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2815198090Srdivacky } 2816198090Srdivacky 2817198090Srdivacky // Extract the l-register. 2818208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, 2819198090Srdivacky MVT::i8, Reg); 2820198090Srdivacky 2821198090Srdivacky // Emit a testb. 2822243830Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 2823243830Sdim Subreg, Imm); 2824243830Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2825243830Sdim // one, do not call ReplaceAllUsesWith. 2826243830Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2827243830Sdim SDValue(NewNode, 0)); 2828276479Sdim return nullptr; 2829193323Sed } 2830198090Srdivacky 2831198090Srdivacky // For example, "testl %eax, $2048" to "testb %ah, $8". 2832198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 2833198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2834296417Sdim hasNoSignedComparisonUses(Node))) { 2835198090Srdivacky // Shift the immediate right by 8 bits. 2836198090Srdivacky SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 2837288943Sdim dl, MVT::i8); 2838198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2839198090Srdivacky 2840198090Srdivacky // Put the value in an ABCD register. 2841234353Sdim const TargetRegisterClass *TRC; 2842261991Sdim switch (N0.getSimpleValueType().SimpleTy) { 2843198090Srdivacky case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 2844198090Srdivacky case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2845198090Srdivacky case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2846198090Srdivacky default: llvm_unreachable("Unsupported TEST operand type!"); 2847198090Srdivacky } 2848288943Sdim SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 2849198090Srdivacky Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2850198090Srdivacky Reg.getValueType(), Reg, RC), 0); 2851198090Srdivacky 2852198090Srdivacky // Extract the h-register. 2853208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, 2854198090Srdivacky MVT::i8, Reg); 2855198090Srdivacky 2856226633Sdim // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only 2857226633Sdim // target GR8_NOREX registers, so make sure the register class is 2858226633Sdim // forced. 2859243830Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, 2860243830Sdim MVT::i32, Subreg, ShiftedImm); 2861243830Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2862243830Sdim // one, do not call ReplaceAllUsesWith. 2863243830Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2864243830Sdim SDValue(NewNode, 0)); 2865276479Sdim return nullptr; 2866193323Sed } 2867198090Srdivacky 2868198090Srdivacky // For example, "testl %eax, $32776" to "testw %ax, $32776". 2869198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 2870198090Srdivacky N0.getValueType() != MVT::i16 && 2871198090Srdivacky (!(C->getZExtValue() & 0x8000) || 2872296417Sdim hasNoSignedComparisonUses(Node))) { 2873288943Sdim SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, 2874288943Sdim MVT::i16); 2875198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2876198090Srdivacky 2877198090Srdivacky // Extract the 16-bit subregister. 2878208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, 2879198090Srdivacky MVT::i16, Reg); 2880198090Srdivacky 2881198090Srdivacky // Emit a testw. 2882243830Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, 2883243830Sdim Subreg, Imm); 2884243830Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2885243830Sdim // one, do not call ReplaceAllUsesWith. 2886243830Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2887243830Sdim SDValue(NewNode, 0)); 2888276479Sdim return nullptr; 2889193323Sed } 2890198090Srdivacky 2891198090Srdivacky // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 2892198090Srdivacky if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 2893198090Srdivacky N0.getValueType() == MVT::i64 && 2894198090Srdivacky (!(C->getZExtValue() & 0x80000000) || 2895296417Sdim hasNoSignedComparisonUses(Node))) { 2896288943Sdim SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, 2897288943Sdim MVT::i32); 2898198090Srdivacky SDValue Reg = N0.getNode()->getOperand(0); 2899198090Srdivacky 2900198090Srdivacky // Extract the 32-bit subregister. 2901208599Srdivacky SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, 2902198090Srdivacky MVT::i32, Reg); 2903198090Srdivacky 2904198090Srdivacky // Emit a testl. 2905243830Sdim SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, 2906243830Sdim Subreg, Imm); 2907243830Sdim // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 2908243830Sdim // one, do not call ReplaceAllUsesWith. 2909243830Sdim ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 2910243830Sdim SDValue(NewNode, 0)); 2911276479Sdim return nullptr; 2912198090Srdivacky } 2913193323Sed } 2914198090Srdivacky break; 2915193323Sed } 2916234353Sdim case ISD::STORE: { 2917234353Sdim // Change a chain of {load; incr or dec; store} of the same value into 2918234353Sdim // a simple increment or decrement through memory of that value, if the 2919234353Sdim // uses of the modified value and its address are suitable. 2920234353Sdim // The DEC64m tablegen pattern is currently not able to match the case where 2921239462Sdim // the EFLAGS on the original DEC are used. (This also applies to 2922234353Sdim // {INC,DEC}X{64,32,16,8}.) 2923234353Sdim // We'll need to improve tablegen to allow flags to be transferred from a 2924234353Sdim // node in the pattern to the result node. probably with a new keyword 2925234353Sdim // for example, we have this 2926234353Sdim // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", 2927234353Sdim // [(store (add (loadi64 addr:$dst), -1), addr:$dst), 2928234353Sdim // (implicit EFLAGS)]>; 2929234353Sdim // but maybe need something like this 2930234353Sdim // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", 2931234353Sdim // [(store (add (loadi64 addr:$dst), -1), addr:$dst), 2932234353Sdim // (transferrable EFLAGS)]>; 2933234353Sdim 2934234353Sdim StoreSDNode *StoreNode = cast<StoreSDNode>(Node); 2935234353Sdim SDValue StoredVal = StoreNode->getOperand(1); 2936234353Sdim unsigned Opc = StoredVal->getOpcode(); 2937234353Sdim 2938276479Sdim LoadSDNode *LoadNode = nullptr; 2939234353Sdim SDValue InputChain; 2940234353Sdim if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, 2941234353Sdim LoadNode, InputChain)) 2942234353Sdim break; 2943234353Sdim 2944234353Sdim SDValue Base, Scale, Index, Disp, Segment; 2945296417Sdim if (!selectAddr(LoadNode, LoadNode->getBasePtr(), 2946234353Sdim Base, Scale, Index, Disp, Segment)) 2947234353Sdim break; 2948234353Sdim 2949234353Sdim MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2); 2950234353Sdim MemOp[0] = StoreNode->getMemOperand(); 2951234353Sdim MemOp[1] = LoadNode->getMemOperand(); 2952234353Sdim const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; 2953239462Sdim EVT LdVT = LoadNode->getMemoryVT(); 2954234353Sdim unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); 2955234353Sdim MachineSDNode *Result = CurDAG->getMachineNode(newOpc, 2956261991Sdim SDLoc(Node), 2957251662Sdim MVT::i32, MVT::Other, Ops); 2958234353Sdim Result->setMemRefs(MemOp, MemOp + 2); 2959234353Sdim 2960234353Sdim ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); 2961234353Sdim ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); 2962234353Sdim 2963234353Sdim return Result; 2964198090Srdivacky } 2965234353Sdim } 2966193323Sed 2967202375Srdivacky SDNode *ResNode = SelectCode(Node); 2968193323Sed 2969204642Srdivacky DEBUG(dbgs() << "=> "; 2970276479Sdim if (ResNode == nullptr || ResNode == Node) 2971204642Srdivacky Node->dump(CurDAG); 2972204642Srdivacky else 2973204642Srdivacky ResNode->dump(CurDAG); 2974204642Srdivacky dbgs() << '\n'); 2975193323Sed 2976193323Sed return ResNode; 2977193323Sed} 2978193323Sed 2979193323Sedbool X86DAGToDAGISel:: 2980288943SdimSelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 2981193323Sed std::vector<SDValue> &OutOps) { 2982193323Sed SDValue Op0, Op1, Op2, Op3, Op4; 2983288943Sdim switch (ConstraintID) { 2984288943Sdim default: 2985288943Sdim llvm_unreachable("Unexpected asm memory constraint"); 2986288943Sdim case InlineAsm::Constraint_i: 2987288943Sdim // FIXME: It seems strange that 'i' is needed here since it's supposed to 2988288943Sdim // be an immediate and not a memory constraint. 2989288943Sdim // Fallthrough. 2990288943Sdim case InlineAsm::Constraint_o: // offsetable ?? 2991288943Sdim case InlineAsm::Constraint_v: // not offsetable ?? 2992288943Sdim case InlineAsm::Constraint_m: // memory 2993288943Sdim case InlineAsm::Constraint_X: 2994296417Sdim if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) 2995193323Sed return true; 2996193323Sed break; 2997193323Sed } 2998239462Sdim 2999193323Sed OutOps.push_back(Op0); 3000193323Sed OutOps.push_back(Op1); 3001193323Sed OutOps.push_back(Op2); 3002193323Sed OutOps.push_back(Op3); 3003193323Sed OutOps.push_back(Op4); 3004193323Sed return false; 3005193323Sed} 3006193323Sed 3007296417Sdim/// This pass converts a legalized DAG into a X86-specific DAG, 3008296417Sdim/// ready for instruction scheduling. 3009193323SedFunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 3010234353Sdim CodeGenOpt::Level OptLevel) { 3011193323Sed return new X86DAGToDAGISel(TM, OptLevel); 3012193323Sed} 3013