1//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the ARMSelectionDAGInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMTargetMachine.h"
15#include "llvm/CodeGen/SelectionDAG.h"
16#include "llvm/IR/DerivedTypes.h"
17using namespace llvm;
18
19#define DEBUG_TYPE "arm-selectiondag-info"
20
21// Emit, if possible, a specialized version of the given Libcall. Typically this
22// means selecting the appropriately aligned version, but we also convert memset
23// of 0 into memclr.
24SDValue ARMSelectionDAGInfo::
25EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
26                       SDValue Chain,
27                       SDValue Dst, SDValue Src,
28                       SDValue Size, unsigned Align,
29                       RTLIB::Libcall LC) const {
30  const ARMSubtarget &Subtarget =
31      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
32  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
33
34  // Only use a specialized AEABI function if the default version of this
35  // Libcall is an AEABI function.
36  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
37    return SDValue();
38
39  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
40  // able to translate memset to memclr and use the value to index the function
41  // name array.
42  enum {
43    AEABI_MEMCPY = 0,
44    AEABI_MEMMOVE,
45    AEABI_MEMSET,
46    AEABI_MEMCLR
47  } AEABILibcall;
48  switch (LC) {
49  case RTLIB::MEMCPY:
50    AEABILibcall = AEABI_MEMCPY;
51    break;
52  case RTLIB::MEMMOVE:
53    AEABILibcall = AEABI_MEMMOVE;
54    break;
55  case RTLIB::MEMSET:
56    AEABILibcall = AEABI_MEMSET;
57    if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
58      if (ConstantSrc->getZExtValue() == 0)
59        AEABILibcall = AEABI_MEMCLR;
60    break;
61  default:
62    return SDValue();
63  }
64
65  // Choose the most-aligned libcall variant that we can
66  enum {
67    ALIGN1 = 0,
68    ALIGN4,
69    ALIGN8
70  } AlignVariant;
71  if ((Align & 7) == 0)
72    AlignVariant = ALIGN8;
73  else if ((Align & 3) == 0)
74    AlignVariant = ALIGN4;
75  else
76    AlignVariant = ALIGN1;
77
78  TargetLowering::ArgListTy Args;
79  TargetLowering::ArgListEntry Entry;
80  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
81  Entry.Node = Dst;
82  Args.push_back(Entry);
83  if (AEABILibcall == AEABI_MEMCLR) {
84    Entry.Node = Size;
85    Args.push_back(Entry);
86  } else if (AEABILibcall == AEABI_MEMSET) {
87    // Adjust parameters for memset, EABI uses format (ptr, size, value),
88    // GNU library uses (ptr, value, size)
89    // See RTABI section 4.3.4
90    Entry.Node = Size;
91    Args.push_back(Entry);
92
93    // Extend or truncate the argument to be an i32 value for the call.
94    if (Src.getValueType().bitsGT(MVT::i32))
95      Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
96    else if (Src.getValueType().bitsLT(MVT::i32))
97      Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
98
99    Entry.Node = Src;
100    Entry.Ty = Type::getInt32Ty(*DAG.getContext());
101    Entry.isSExt = false;
102    Args.push_back(Entry);
103  } else {
104    Entry.Node = Src;
105    Args.push_back(Entry);
106
107    Entry.Node = Size;
108    Args.push_back(Entry);
109  }
110
111  char const *FunctionNames[4][3] = {
112    { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
113    { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
114    { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
115    { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
116  };
117  TargetLowering::CallLoweringInfo CLI(DAG);
118  CLI.setDebugLoc(dl)
119      .setChain(Chain)
120      .setCallee(
121           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
122           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
123                                 TLI->getPointerTy(DAG.getDataLayout())),
124           std::move(Args), 0)
125      .setDiscardResult();
126  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
127
128  return CallResult.second;
129}
130
131SDValue
132ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
133                                             SDValue Chain,
134                                             SDValue Dst, SDValue Src,
135                                             SDValue Size, unsigned Align,
136                                             bool isVolatile, bool AlwaysInline,
137                                             MachinePointerInfo DstPtrInfo,
138                                          MachinePointerInfo SrcPtrInfo) const {
139  const ARMSubtarget &Subtarget =
140      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
141  // Do repeated 4-byte loads and stores. To be improved.
142  // This requires 4-byte alignment.
143  if ((Align & 3) != 0)
144    return SDValue();
145  // This requires the copy size to be a constant, preferably
146  // within a subtarget-specific limit.
147  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
148  if (!ConstantSize)
149    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
150                                  RTLIB::MEMCPY);
151  uint64_t SizeVal = ConstantSize->getZExtValue();
152  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
153    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
154                                  RTLIB::MEMCPY);
155
156  unsigned BytesLeft = SizeVal & 3;
157  unsigned NumMemOps = SizeVal >> 2;
158  unsigned EmittedNumMemOps = 0;
159  EVT VT = MVT::i32;
160  unsigned VTSize = 4;
161  unsigned i = 0;
162  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
163  const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
164  SDValue TFOps[6];
165  SDValue Loads[6];
166  uint64_t SrcOff = 0, DstOff = 0;
167
168  // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
169  // VLDM/VSTM and make this code emit it when appropriate. This would reduce
170  // pressure on the general purpose registers. However this seems harder to map
171  // onto the register allocator's view of the world.
172
173  // The number of MEMCPY pseudo-instructions to emit. We use up to
174  // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
175  // later on. This is a lower bound on the number of MEMCPY operations we must
176  // emit.
177  unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
178
179  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
180
181  for (unsigned I = 0; I != NumMEMCPYs; ++I) {
182    // Evenly distribute registers among MEMCPY operations to reduce register
183    // pressure.
184    unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
185    unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
186
187    Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
188                      DAG.getConstant(NumRegs, dl, MVT::i32));
189    Src = Dst.getValue(1);
190    Chain = Dst.getValue(2);
191
192    DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
193    SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
194
195    EmittedNumMemOps = NextEmittedNumMemOps;
196  }
197
198  if (BytesLeft == 0)
199    return Chain;
200
201  // Issue loads / stores for the trailing (1 - 3) bytes.
202  unsigned BytesLeftSave = BytesLeft;
203  i = 0;
204  while (BytesLeft) {
205    if (BytesLeft >= 2) {
206      VT = MVT::i16;
207      VTSize = 2;
208    } else {
209      VT = MVT::i8;
210      VTSize = 1;
211    }
212
213    Loads[i] = DAG.getLoad(VT, dl, Chain,
214                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
215                                       DAG.getConstant(SrcOff, dl, MVT::i32)),
216                           SrcPtrInfo.getWithOffset(SrcOff),
217                           false, false, false, 0);
218    TFOps[i] = Loads[i].getValue(1);
219    ++i;
220    SrcOff += VTSize;
221    BytesLeft -= VTSize;
222  }
223  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
224                      makeArrayRef(TFOps, i));
225
226  i = 0;
227  BytesLeft = BytesLeftSave;
228  while (BytesLeft) {
229    if (BytesLeft >= 2) {
230      VT = MVT::i16;
231      VTSize = 2;
232    } else {
233      VT = MVT::i8;
234      VTSize = 1;
235    }
236
237    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
238                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
239                                        DAG.getConstant(DstOff, dl, MVT::i32)),
240                            DstPtrInfo.getWithOffset(DstOff), false, false, 0);
241    ++i;
242    DstOff += VTSize;
243    BytesLeft -= VTSize;
244  }
245  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
246                     makeArrayRef(TFOps, i));
247}
248
249
250SDValue ARMSelectionDAGInfo::
251EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
252                         SDValue Chain,
253                         SDValue Dst, SDValue Src,
254                         SDValue Size, unsigned Align,
255                         bool isVolatile,
256                         MachinePointerInfo DstPtrInfo,
257                         MachinePointerInfo SrcPtrInfo) const {
258  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
259                                RTLIB::MEMMOVE);
260}
261
262
263SDValue ARMSelectionDAGInfo::
264EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
265                        SDValue Chain, SDValue Dst,
266                        SDValue Src, SDValue Size,
267                        unsigned Align, bool isVolatile,
268                        MachinePointerInfo DstPtrInfo) const {
269  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
270                                RTLIB::MEMSET);
271}
272