1//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the X86SelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86SelectionDAGInfo.h"
14#include "X86ISelLowering.h"
15#include "X86InstrInfo.h"
16#include "X86RegisterInfo.h"
17#include "X86Subtarget.h"
18#include "llvm/CodeGen/SelectionDAG.h"
19#include "llvm/CodeGen/TargetLowering.h"
20#include "llvm/IR/DerivedTypes.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "x86-selectiondag-info"
25
26bool X86SelectionDAGInfo::isBaseRegConflictPossible(
27    SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
28  // We cannot use TRI->hasBasePointer() until *after* we select all basic
29  // blocks.  Legalization may introduce new stack temporaries with large
30  // alignment requirements.  Fall back to generic code if there are any
31  // dynamic stack adjustments (hopefully rare) and the base pointer would
32  // conflict if we had to use it.
33  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
34  if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
35    return false;
36
37  const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
38      DAG.getSubtarget().getRegisterInfo());
39  Register BaseReg = TRI->getBaseRegister();
40  for (unsigned R : ClobberSet)
41    if (BaseReg == R)
42      return true;
43  return false;
44}
45
46SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
47    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
48    SDValue Size, unsigned Align, bool isVolatile,
49    MachinePointerInfo DstPtrInfo) const {
50  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
51  const X86Subtarget &Subtarget =
52      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
53
54#ifndef NDEBUG
55  // If the base register might conflict with our physical registers, bail out.
56  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
57                                  X86::ECX, X86::EAX, X86::EDI};
58  assert(!isBaseRegConflictPossible(DAG, ClobberSet));
59#endif
60
61  // If to a segment-relative address space, use the default lowering.
62  if (DstPtrInfo.getAddrSpace() >= 256)
63    return SDValue();
64
65  // If not DWORD aligned or size is more than the threshold, call the library.
66  // The libc version is likely to be faster for these cases. It can use the
67  // address value and run time information about the CPU.
68  if ((Align & 3) != 0 || !ConstantSize ||
69      ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
70    // Check to see if there is a specialized entry-point for memory zeroing.
71    ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
72
73    if (const char *bzeroName = (ValC && ValC->isNullValue())
74        ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
75        : nullptr) {
76      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
77      EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
78      Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
79      TargetLowering::ArgListTy Args;
80      TargetLowering::ArgListEntry Entry;
81      Entry.Node = Dst;
82      Entry.Ty = IntPtrTy;
83      Args.push_back(Entry);
84      Entry.Node = Size;
85      Args.push_back(Entry);
86
87      TargetLowering::CallLoweringInfo CLI(DAG);
88      CLI.setDebugLoc(dl)
89          .setChain(Chain)
90          .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
91                        DAG.getExternalSymbol(bzeroName, IntPtr),
92                        std::move(Args))
93          .setDiscardResult();
94
95      std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
96      return CallResult.second;
97    }
98
99    // Otherwise have the target-independent code call memset.
100    return SDValue();
101  }
102
103  uint64_t SizeVal = ConstantSize->getZExtValue();
104  SDValue InFlag;
105  EVT AVT;
106  SDValue Count;
107  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
108  unsigned BytesLeft = 0;
109  if (ValC) {
110    unsigned ValReg;
111    uint64_t Val = ValC->getZExtValue() & 255;
112
113    // If the value is a constant, then we can potentially use larger sets.
114    switch (Align & 3) {
115    case 2:   // WORD aligned
116      AVT = MVT::i16;
117      ValReg = X86::AX;
118      Val = (Val << 8) | Val;
119      break;
120    case 0:  // DWORD aligned
121      AVT = MVT::i32;
122      ValReg = X86::EAX;
123      Val = (Val << 8)  | Val;
124      Val = (Val << 16) | Val;
125      if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
126        AVT = MVT::i64;
127        ValReg = X86::RAX;
128        Val = (Val << 32) | Val;
129      }
130      break;
131    default:  // Byte aligned
132      AVT = MVT::i8;
133      ValReg = X86::AL;
134      Count = DAG.getIntPtrConstant(SizeVal, dl);
135      break;
136    }
137
138    if (AVT.bitsGT(MVT::i8)) {
139      unsigned UBytes = AVT.getSizeInBits() / 8;
140      Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
141      BytesLeft = SizeVal % UBytes;
142    }
143
144    Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
145                             InFlag);
146    InFlag = Chain.getValue(1);
147  } else {
148    AVT = MVT::i8;
149    Count  = DAG.getIntPtrConstant(SizeVal, dl);
150    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag);
151    InFlag = Chain.getValue(1);
152  }
153
154  bool Use64BitRegs = Subtarget.isTarget64BitLP64();
155  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
156                           Count, InFlag);
157  InFlag = Chain.getValue(1);
158  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
159                           Dst, InFlag);
160  InFlag = Chain.getValue(1);
161
162  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
163  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
164  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
165
166  if (BytesLeft) {
167    // Handle the last 1 - 7 bytes.
168    unsigned Offset = SizeVal - BytesLeft;
169    EVT AddrVT = Dst.getValueType();
170    EVT SizeVT = Size.getValueType();
171
172    Chain = DAG.getMemset(Chain, dl,
173                          DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
174                                      DAG.getConstant(Offset, dl, AddrVT)),
175                          Val,
176                          DAG.getConstant(BytesLeft, dl, SizeVT),
177                          Align, isVolatile, false,
178                          DstPtrInfo.getWithOffset(Offset));
179  }
180
181  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
182  return Chain;
183}
184
185/// Emit a single REP MOVS{B,W,D,Q} instruction.
186static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
187                           const SDLoc &dl, SDValue Chain, SDValue Dst,
188                           SDValue Src, SDValue Size, MVT AVT) {
189  const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
190  const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
191  const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
192  const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
193
194  SDValue InFlag;
195  Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InFlag);
196  InFlag = Chain.getValue(1);
197  Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InFlag);
198  InFlag = Chain.getValue(1);
199  Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InFlag);
200  InFlag = Chain.getValue(1);
201
202  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
203  SDValue Ops[] = {Chain, DAG.getValueType(AVT), InFlag};
204  return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
205}
206
207/// Emit a single REP MOVSB instruction for a particular constant size.
208static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
209                            const SDLoc &dl, SDValue Chain, SDValue Dst,
210                            SDValue Src, uint64_t Size) {
211  return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
212                     DAG.getIntPtrConstant(Size, dl), MVT::i8);
213}
214
215/// Returns the best type to use with repmovs depending on alignment.
216static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
217                                 uint64_t Align) {
218  assert((Align != 0) && "Align is normalized");
219  assert(isPowerOf2_64(Align) && "Align is a power of 2");
220  switch (Align) {
221  case 1:
222    return MVT::i8;
223  case 2:
224    return MVT::i16;
225  case 4:
226    return MVT::i32;
227  default:
228    return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
229  }
230}
231
232/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
233/// a constant size memory copy. In some cases where we know REP MOVS is
234/// inefficient we return an empty SDValue so the calling code can either
235/// generate a load/store sequence or call the runtime memcpy function.
236static SDValue emitConstantSizeRepmov(
237    SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
238    SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
239    unsigned Align, bool isVolatile, bool AlwaysInline,
240    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
241
242  /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
243  /// efficient.
244  if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
245    return SDValue();
246
247  /// If we have enhanced repmovs we use it.
248  if (Subtarget.hasERMSB())
249    return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
250
251  assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
252  /// We assume runtime memcpy will do a better job for unaligned copies when
253  /// ERMS is not present.
254  if (!AlwaysInline && (Align & 3) != 0)
255    return SDValue();
256
257  const MVT BlockType = getOptimalRepmovsType(Subtarget, Align);
258  const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
259  const uint64_t BlockCount = Size / BlockBytes;
260  const uint64_t BytesLeft = Size % BlockBytes;
261  SDValue RepMovs =
262      emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
263                  DAG.getIntPtrConstant(BlockCount, dl), BlockType);
264
265  /// RepMov can process the whole length.
266  if (BytesLeft == 0)
267    return RepMovs;
268
269  assert(BytesLeft && "We have leftover at this point");
270
271  /// In case we optimize for size we use repmovsb even if it's less efficient
272  /// so we can save the loads/stores of the leftover.
273  if (DAG.getMachineFunction().getFunction().hasMinSize())
274    return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
275
276  // Handle the last 1 - 7 bytes.
277  SmallVector<SDValue, 4> Results;
278  Results.push_back(RepMovs);
279  unsigned Offset = Size - BytesLeft;
280  EVT DstVT = Dst.getValueType();
281  EVT SrcVT = Src.getValueType();
282  Results.push_back(DAG.getMemcpy(
283      Chain, dl,
284      DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
285      DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
286      DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile,
287      /*AlwaysInline*/ true, /*isTailCall*/ false,
288      DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
289  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
290}
291
292SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
293    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
294    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
295    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
296  // If to a segment-relative address space, use the default lowering.
297  if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
298    return SDValue();
299
300  // If the base registers conflict with our physical registers, use the default
301  // lowering.
302  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
303                                  X86::ECX, X86::ESI, X86::EDI};
304  if (isBaseRegConflictPossible(DAG, ClobberSet))
305    return SDValue();
306
307  const X86Subtarget &Subtarget =
308      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
309
310  /// Handle constant sizes,
311  if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
312    return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
313                                  ConstantSize->getZExtValue(),
314                                  Size.getValueType(), Align, isVolatile,
315                                  AlwaysInline, DstPtrInfo, SrcPtrInfo);
316
317  return SDValue();
318}
319