X86SelectionDAGInfo.cpp revision 263508
1//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the X86SelectionDAGInfo class. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "x86-selectiondag-info" 15#include "X86TargetMachine.h" 16#include "llvm/CodeGen/SelectionDAG.h" 17#include "llvm/IR/DerivedTypes.h" 18using namespace llvm; 19 20X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : 21 TargetSelectionDAGInfo(TM), 22 Subtarget(&TM.getSubtarget<X86Subtarget>()), 23 TLI(*TM.getTargetLowering()) { 24} 25 26X86SelectionDAGInfo::~X86SelectionDAGInfo() { 27} 28 29SDValue 30X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 31 SDValue Chain, 32 SDValue Dst, SDValue Src, 33 SDValue Size, unsigned Align, 34 bool isVolatile, 35 MachinePointerInfo DstPtrInfo) const { 36 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 37 38 // If to a segment-relative address space, use the default lowering. 39 if (DstPtrInfo.getAddrSpace() >= 256) 40 return SDValue(); 41 42 // If not DWORD aligned or size is more than the threshold, call the library. 43 // The libc version is likely to be faster for these cases. It can use the 44 // address value and run time information about the CPU. 45 if ((Align & 3) != 0 || 46 !ConstantSize || 47 ConstantSize->getZExtValue() > 48 Subtarget->getMaxInlineSizeThreshold()) { 49 // Check to see if there is a specialized entry-point for memory zeroing. 50 ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); 51 52 if (const char *bzeroEntry = V && 53 V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { 54 EVT IntPtr = TLI.getPointerTy(); 55 Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); 56 TargetLowering::ArgListTy Args; 57 TargetLowering::ArgListEntry Entry; 58 Entry.Node = Dst; 59 Entry.Ty = IntPtrTy; 60 Args.push_back(Entry); 61 Entry.Node = Size; 62 Args.push_back(Entry); 63 TargetLowering:: 64 CallLoweringInfo CLI(Chain, Type::getVoidTy(*DAG.getContext()), 65 false, false, false, false, 66 0, CallingConv::C, /*isTailCall=*/false, 67 /*doesNotRet=*/false, /*isReturnValueUsed=*/false, 68 DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, 69 DAG, dl); 70 std::pair<SDValue,SDValue> CallResult = 71 TLI.LowerCallTo(CLI); 72 return CallResult.second; 73 } 74 75 // Otherwise have the target-independent code call memset. 76 return SDValue(); 77 } 78 79 uint64_t SizeVal = ConstantSize->getZExtValue(); 80 SDValue InFlag(0, 0); 81 EVT AVT; 82 SDValue Count; 83 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); 84 unsigned BytesLeft = 0; 85 bool TwoRepStos = false; 86 if (ValC) { 87 unsigned ValReg; 88 uint64_t Val = ValC->getZExtValue() & 255; 89 90 // If the value is a constant, then we can potentially use larger sets. 91 switch (Align & 3) { 92 case 2: // WORD aligned 93 AVT = MVT::i16; 94 ValReg = X86::AX; 95 Val = (Val << 8) | Val; 96 break; 97 case 0: // DWORD aligned 98 AVT = MVT::i32; 99 ValReg = X86::EAX; 100 Val = (Val << 8) | Val; 101 Val = (Val << 16) | Val; 102 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 103 AVT = MVT::i64; 104 ValReg = X86::RAX; 105 Val = (Val << 32) | Val; 106 } 107 break; 108 default: // Byte aligned 109 AVT = MVT::i8; 110 ValReg = X86::AL; 111 Count = DAG.getIntPtrConstant(SizeVal); 112 break; 113 } 114 115 if (AVT.bitsGT(MVT::i8)) { 116 unsigned UBytes = AVT.getSizeInBits() / 8; 117 Count = DAG.getIntPtrConstant(SizeVal / UBytes); 118 BytesLeft = SizeVal % UBytes; 119 } 120 121 Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), 122 InFlag); 123 InFlag = Chain.getValue(1); 124 } else { 125 AVT = MVT::i8; 126 Count = DAG.getIntPtrConstant(SizeVal); 127 Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); 128 InFlag = Chain.getValue(1); 129 } 130 131 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : 132 X86::ECX, 133 Count, InFlag); 134 InFlag = Chain.getValue(1); 135 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : 136 X86::EDI, 137 Dst, InFlag); 138 InFlag = Chain.getValue(1); 139 140 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); 141 SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 142 Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); 143 144 if (TwoRepStos) { 145 InFlag = Chain.getValue(1); 146 Count = Size; 147 EVT CVT = Count.getValueType(); 148 SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, 149 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 150 Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : 151 X86::ECX, 152 Left, InFlag); 153 InFlag = Chain.getValue(1); 154 Tys = DAG.getVTList(MVT::Other, MVT::Glue); 155 SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; 156 Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); 157 } else if (BytesLeft) { 158 // Handle the last 1 - 7 bytes. 159 unsigned Offset = SizeVal - BytesLeft; 160 EVT AddrVT = Dst.getValueType(); 161 EVT SizeVT = Size.getValueType(); 162 163 Chain = DAG.getMemset(Chain, dl, 164 DAG.getNode(ISD::ADD, dl, AddrVT, Dst, 165 DAG.getConstant(Offset, AddrVT)), 166 Src, 167 DAG.getConstant(BytesLeft, SizeVT), 168 Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); 169 } 170 171 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 172 return Chain; 173} 174 175SDValue 176X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, 177 SDValue Chain, SDValue Dst, SDValue Src, 178 SDValue Size, unsigned Align, 179 bool isVolatile, bool AlwaysInline, 180 MachinePointerInfo DstPtrInfo, 181 MachinePointerInfo SrcPtrInfo) const { 182 // This requires the copy size to be a constant, preferably 183 // within a subtarget-specific limit. 184 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 185 if (!ConstantSize) 186 return SDValue(); 187 uint64_t SizeVal = ConstantSize->getZExtValue(); 188 if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) 189 return SDValue(); 190 191 /// If not DWORD aligned, it is more efficient to call the library. However 192 /// if calling the library is not allowed (AlwaysInline), then soldier on as 193 /// the code generated here is better than the long load-store sequence we 194 /// would otherwise get. 195 if (!AlwaysInline && (Align & 3) != 0) 196 return SDValue(); 197 198 // If to a segment-relative address space, use the default lowering. 199 if (DstPtrInfo.getAddrSpace() >= 256 || 200 SrcPtrInfo.getAddrSpace() >= 256) 201 return SDValue(); 202 203 // ESI might be used as a base pointer, in that case we can't simply overwrite 204 // the register. Fall back to generic code. 205 const X86RegisterInfo *TRI = 206 static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); 207 if (TRI->hasBasePointer(DAG.getMachineFunction()) && 208 TRI->getBaseRegister() == X86::ESI) 209 return SDValue(); 210 211 MVT AVT; 212 if (Align & 1) 213 AVT = MVT::i8; 214 else if (Align & 2) 215 AVT = MVT::i16; 216 else if (Align & 4) 217 // DWORD aligned 218 AVT = MVT::i32; 219 else 220 // QWORD aligned 221 AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 222 223 unsigned UBytes = AVT.getSizeInBits() / 8; 224 unsigned CountVal = SizeVal / UBytes; 225 SDValue Count = DAG.getIntPtrConstant(CountVal); 226 unsigned BytesLeft = SizeVal % UBytes; 227 228 SDValue InFlag(0, 0); 229 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : 230 X86::ECX, 231 Count, InFlag); 232 InFlag = Chain.getValue(1); 233 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : 234 X86::EDI, 235 Dst, InFlag); 236 InFlag = Chain.getValue(1); 237 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : 238 X86::ESI, 239 Src, InFlag); 240 InFlag = Chain.getValue(1); 241 242 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); 243 SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 244 SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, 245 array_lengthof(Ops)); 246 247 SmallVector<SDValue, 4> Results; 248 Results.push_back(RepMovs); 249 if (BytesLeft) { 250 // Handle the last 1 - 7 bytes. 251 unsigned Offset = SizeVal - BytesLeft; 252 EVT DstVT = Dst.getValueType(); 253 EVT SrcVT = Src.getValueType(); 254 EVT SizeVT = Size.getValueType(); 255 Results.push_back(DAG.getMemcpy(Chain, dl, 256 DAG.getNode(ISD::ADD, dl, DstVT, Dst, 257 DAG.getConstant(Offset, DstVT)), 258 DAG.getNode(ISD::ADD, dl, SrcVT, Src, 259 DAG.getConstant(Offset, SrcVT)), 260 DAG.getConstant(BytesLeft, SizeVT), 261 Align, isVolatile, AlwaysInline, 262 DstPtrInfo.getWithOffset(Offset), 263 SrcPtrInfo.getWithOffset(Offset))); 264 } 265 266 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 267 &Results[0], Results.size()); 268} 269