1207618Srdivacky//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===// 2207618Srdivacky// 3207618Srdivacky// The LLVM Compiler Infrastructure 4207618Srdivacky// 5207618Srdivacky// This file is distributed under the University of Illinois Open Source 6207618Srdivacky// License. See LICENSE.TXT for details. 7207618Srdivacky// 8207618Srdivacky//===----------------------------------------------------------------------===// 9207618Srdivacky// 10207618Srdivacky// This file implements the X86SelectionDAGInfo class. 11207618Srdivacky// 12207618Srdivacky//===----------------------------------------------------------------------===// 13207618Srdivacky 14276479Sdim#include "X86InstrInfo.h" 15276479Sdim#include "X86ISelLowering.h" 16276479Sdim#include "X86RegisterInfo.h" 17276479Sdim#include "X86Subtarget.h" 18276479Sdim#include "X86SelectionDAGInfo.h" 19208599Srdivacky#include "llvm/CodeGen/SelectionDAG.h" 20249423Sdim#include "llvm/IR/DerivedTypes.h" 21276479Sdim#include "llvm/Target/TargetLowering.h" 22276479Sdim 23207618Srdivackyusing namespace llvm; 24207618Srdivacky 25276479Sdim#define DEBUG_TYPE "x86-selectiondag-info" 26207618Srdivacky 27280031Sdimbool X86SelectionDAGInfo::isBaseRegConflictPossible( 28280031Sdim SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const { 29280031Sdim // We cannot use TRI->hasBasePointer() until *after* we select all basic 30280031Sdim // blocks. Legalization may introduce new stack temporaries with large 31280031Sdim // alignment requirements. Fall back to generic code if there are any 32280031Sdim // dynamic stack adjustments (hopefully rare) and the base pointer would 33280031Sdim // conflict if we had to use it. 34280031Sdim MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 35288943Sdim if (!MFI->hasVarSizedObjects() && !MFI->hasOpaqueSPAdjustment()) 36280031Sdim return false; 37280031Sdim 38280031Sdim const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>( 39280031Sdim DAG.getSubtarget().getRegisterInfo()); 40280031Sdim unsigned BaseReg = TRI->getBaseRegister(); 41280031Sdim for (unsigned R : ClobberSet) 42280031Sdim if (BaseReg == R) 43280031Sdim return true; 44280031Sdim return false; 45280031Sdim} 46280031Sdim 47296417SdimSDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( 48296417Sdim SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, 49296417Sdim SDValue Size, unsigned Align, bool isVolatile, 50296417Sdim MachinePointerInfo DstPtrInfo) const { 51208599Srdivacky ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 52288943Sdim const X86Subtarget &Subtarget = 53288943Sdim DAG.getMachineFunction().getSubtarget<X86Subtarget>(); 54208599Srdivacky 55280031Sdim#ifndef NDEBUG 56280031Sdim // If the base register might conflict with our physical registers, bail out. 57288943Sdim const unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI, 58288943Sdim X86::ECX, X86::EAX, X86::EDI}; 59280031Sdim assert(!isBaseRegConflictPossible(DAG, ClobberSet)); 60280031Sdim#endif 61280031Sdim 62218893Sdim // If to a segment-relative address space, use the default lowering. 63218893Sdim if (DstPtrInfo.getAddrSpace() >= 256) 64218893Sdim return SDValue(); 65239462Sdim 66208599Srdivacky // If not DWORD aligned or size is more than the threshold, call the library. 67208599Srdivacky // The libc version is likely to be faster for these cases. It can use the 68208599Srdivacky // address value and run time information about the CPU. 69276479Sdim if ((Align & 3) != 0 || !ConstantSize || 70276479Sdim ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { 71208599Srdivacky // Check to see if there is a specialized entry-point for memory zeroing. 72208599Srdivacky ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); 73208599Srdivacky 74296417Sdim if (const char *bzeroEntry = V && 75276479Sdim V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { 76296417Sdim const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 77296417Sdim EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); 78288943Sdim Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 79208599Srdivacky TargetLowering::ArgListTy Args; 80208599Srdivacky TargetLowering::ArgListEntry Entry; 81208599Srdivacky Entry.Node = Dst; 82208599Srdivacky Entry.Ty = IntPtrTy; 83208599Srdivacky Args.push_back(Entry); 84208599Srdivacky Entry.Node = Size; 85208599Srdivacky Args.push_back(Entry); 86276479Sdim 87276479Sdim TargetLowering::CallLoweringInfo CLI(DAG); 88276479Sdim CLI.setDebugLoc(dl).setChain(Chain) 89276479Sdim .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 90276479Sdim DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args), 91276479Sdim 0) 92276479Sdim .setDiscardResult(); 93276479Sdim 94296417Sdim std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); 95208599Srdivacky return CallResult.second; 96208599Srdivacky } 97208599Srdivacky 98208599Srdivacky // Otherwise have the target-independent code call memset. 99208599Srdivacky return SDValue(); 100208599Srdivacky } 101208599Srdivacky 102208599Srdivacky uint64_t SizeVal = ConstantSize->getZExtValue(); 103276479Sdim SDValue InFlag; 104208599Srdivacky EVT AVT; 105208599Srdivacky SDValue Count; 106208599Srdivacky ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); 107208599Srdivacky unsigned BytesLeft = 0; 108208599Srdivacky bool TwoRepStos = false; 109208599Srdivacky if (ValC) { 110208599Srdivacky unsigned ValReg; 111208599Srdivacky uint64_t Val = ValC->getZExtValue() & 255; 112208599Srdivacky 113208599Srdivacky // If the value is a constant, then we can potentially use larger sets. 114208599Srdivacky switch (Align & 3) { 115208599Srdivacky case 2: // WORD aligned 116208599Srdivacky AVT = MVT::i16; 117208599Srdivacky ValReg = X86::AX; 118208599Srdivacky Val = (Val << 8) | Val; 119208599Srdivacky break; 120208599Srdivacky case 0: // DWORD aligned 121208599Srdivacky AVT = MVT::i32; 122208599Srdivacky ValReg = X86::EAX; 123208599Srdivacky Val = (Val << 8) | Val; 124208599Srdivacky Val = (Val << 16) | Val; 125276479Sdim if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 126208599Srdivacky AVT = MVT::i64; 127208599Srdivacky ValReg = X86::RAX; 128208599Srdivacky Val = (Val << 32) | Val; 129208599Srdivacky } 130208599Srdivacky break; 131208599Srdivacky default: // Byte aligned 132208599Srdivacky AVT = MVT::i8; 133208599Srdivacky ValReg = X86::AL; 134288943Sdim Count = DAG.getIntPtrConstant(SizeVal, dl); 135208599Srdivacky break; 136208599Srdivacky } 137208599Srdivacky 138208599Srdivacky if (AVT.bitsGT(MVT::i8)) { 139208599Srdivacky unsigned UBytes = AVT.getSizeInBits() / 8; 140288943Sdim Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl); 141208599Srdivacky BytesLeft = SizeVal % UBytes; 142208599Srdivacky } 143208599Srdivacky 144296417Sdim Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), 145296417Sdim InFlag); 146208599Srdivacky InFlag = Chain.getValue(1); 147208599Srdivacky } else { 148208599Srdivacky AVT = MVT::i8; 149288943Sdim Count = DAG.getIntPtrConstant(SizeVal, dl); 150208599Srdivacky Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); 151208599Srdivacky InFlag = Chain.getValue(1); 152208599Srdivacky } 153208599Srdivacky 154276479Sdim Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, 155276479Sdim Count, InFlag); 156208599Srdivacky InFlag = Chain.getValue(1); 157276479Sdim Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, 158276479Sdim Dst, InFlag); 159208599Srdivacky InFlag = Chain.getValue(1); 160208599Srdivacky 161218893Sdim SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); 162208599Srdivacky SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 163276479Sdim Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); 164208599Srdivacky 165208599Srdivacky if (TwoRepStos) { 166208599Srdivacky InFlag = Chain.getValue(1); 167208599Srdivacky Count = Size; 168208599Srdivacky EVT CVT = Count.getValueType(); 169208599Srdivacky SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, 170288943Sdim DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, 171288943Sdim CVT)); 172296417Sdim Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 173296417Sdim Left, InFlag); 174208599Srdivacky InFlag = Chain.getValue(1); 175218893Sdim Tys = DAG.getVTList(MVT::Other, MVT::Glue); 176208599Srdivacky SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; 177276479Sdim Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); 178208599Srdivacky } else if (BytesLeft) { 179208599Srdivacky // Handle the last 1 - 7 bytes. 180208599Srdivacky unsigned Offset = SizeVal - BytesLeft; 181208599Srdivacky EVT AddrVT = Dst.getValueType(); 182208599Srdivacky EVT SizeVT = Size.getValueType(); 183208599Srdivacky 184208599Srdivacky Chain = DAG.getMemset(Chain, dl, 185208599Srdivacky DAG.getNode(ISD::ADD, dl, AddrVT, Dst, 186288943Sdim DAG.getConstant(Offset, dl, AddrVT)), 187208599Srdivacky Src, 188288943Sdim DAG.getConstant(BytesLeft, dl, SizeVT), 189288943Sdim Align, isVolatile, false, 190288943Sdim DstPtrInfo.getWithOffset(Offset)); 191208599Srdivacky } 192208599Srdivacky 193208599Srdivacky // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 194208599Srdivacky return Chain; 195208599Srdivacky} 196208599Srdivacky 197288943SdimSDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( 198288943Sdim SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, 199288943Sdim SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, 200288943Sdim MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { 201221345Sdim // This requires the copy size to be a constant, preferably 202208599Srdivacky // within a subtarget-specific limit. 203208599Srdivacky ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 204288943Sdim const X86Subtarget &Subtarget = 205288943Sdim DAG.getMachineFunction().getSubtarget<X86Subtarget>(); 206208599Srdivacky if (!ConstantSize) 207208599Srdivacky return SDValue(); 208208599Srdivacky uint64_t SizeVal = ConstantSize->getZExtValue(); 209276479Sdim if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) 210208599Srdivacky return SDValue(); 211208599Srdivacky 212218893Sdim /// If not DWORD aligned, it is more efficient to call the library. However 213218893Sdim /// if calling the library is not allowed (AlwaysInline), then soldier on as 214218893Sdim /// the code generated here is better than the long load-store sequence we 215218893Sdim /// would otherwise get. 216218893Sdim if (!AlwaysInline && (Align & 3) != 0) 217208599Srdivacky return SDValue(); 218208599Srdivacky 219218893Sdim // If to a segment-relative address space, use the default lowering. 220218893Sdim if (DstPtrInfo.getAddrSpace() >= 256 || 221218893Sdim SrcPtrInfo.getAddrSpace() >= 256) 222218893Sdim return SDValue(); 223208599Srdivacky 224280031Sdim // If the base register might conflict with our physical registers, bail out. 225288943Sdim const unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI, 226288943Sdim X86::ECX, X86::ESI, X86::EDI}; 227280031Sdim if (isBaseRegConflictPossible(DAG, ClobberSet)) 228246858Sdim return SDValue(); 229246858Sdim 230218893Sdim MVT AVT; 231218893Sdim if (Align & 1) 232218893Sdim AVT = MVT::i8; 233218893Sdim else if (Align & 2) 234218893Sdim AVT = MVT::i16; 235218893Sdim else if (Align & 4) 236218893Sdim // DWORD aligned 237218893Sdim AVT = MVT::i32; 238218893Sdim else 239218893Sdim // QWORD aligned 240276479Sdim AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; 241218893Sdim 242208599Srdivacky unsigned UBytes = AVT.getSizeInBits() / 8; 243208599Srdivacky unsigned CountVal = SizeVal / UBytes; 244288943Sdim SDValue Count = DAG.getIntPtrConstant(CountVal, dl); 245208599Srdivacky unsigned BytesLeft = SizeVal % UBytes; 246208599Srdivacky 247276479Sdim SDValue InFlag; 248296417Sdim Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, 249296417Sdim Count, InFlag); 250208599Srdivacky InFlag = Chain.getValue(1); 251296417Sdim Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, 252296417Sdim Dst, InFlag); 253208599Srdivacky InFlag = Chain.getValue(1); 254296417Sdim Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI, 255296417Sdim Src, InFlag); 256208599Srdivacky InFlag = Chain.getValue(1); 257208599Srdivacky 258218893Sdim SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); 259208599Srdivacky SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 260276479Sdim SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); 261208599Srdivacky 262208599Srdivacky SmallVector<SDValue, 4> Results; 263208599Srdivacky Results.push_back(RepMovs); 264208599Srdivacky if (BytesLeft) { 265208599Srdivacky // Handle the last 1 - 7 bytes. 266208599Srdivacky unsigned Offset = SizeVal - BytesLeft; 267208599Srdivacky EVT DstVT = Dst.getValueType(); 268208599Srdivacky EVT SrcVT = Src.getValueType(); 269208599Srdivacky EVT SizeVT = Size.getValueType(); 270208599Srdivacky Results.push_back(DAG.getMemcpy(Chain, dl, 271208599Srdivacky DAG.getNode(ISD::ADD, dl, DstVT, Dst, 272288943Sdim DAG.getConstant(Offset, dl, 273288943Sdim DstVT)), 274208599Srdivacky DAG.getNode(ISD::ADD, dl, SrcVT, Src, 275288943Sdim DAG.getConstant(Offset, dl, 276288943Sdim SrcVT)), 277288943Sdim DAG.getConstant(BytesLeft, dl, SizeVT), 278288943Sdim Align, isVolatile, AlwaysInline, false, 279218893Sdim DstPtrInfo.getWithOffset(Offset), 280218893Sdim SrcPtrInfo.getWithOffset(Offset))); 281208599Srdivacky } 282208599Srdivacky 283276479Sdim return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); 284208599Srdivacky} 285