1207618Srdivacky//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2207618Srdivacky//
3207618Srdivacky//                     The LLVM Compiler Infrastructure
4207618Srdivacky//
5207618Srdivacky// This file is distributed under the University of Illinois Open Source
6207618Srdivacky// License. See LICENSE.TXT for details.
7207618Srdivacky//
8207618Srdivacky//===----------------------------------------------------------------------===//
9207618Srdivacky//
10207618Srdivacky// This file implements the ARMSelectionDAGInfo class.
11207618Srdivacky//
12207618Srdivacky//===----------------------------------------------------------------------===//
13207618Srdivacky
14208599Srdivacky#include "ARMTargetMachine.h"
15223017Sdim#include "llvm/CodeGen/SelectionDAG.h"
16249423Sdim#include "llvm/IR/DerivedTypes.h"
17207618Srdivackyusing namespace llvm;
18207618Srdivacky
19276479Sdim#define DEBUG_TYPE "arm-selectiondag-info"
20207618Srdivacky
21288943Sdim// Emit, if possible, a specialized version of the given Libcall. Typically this
22288943Sdim// means selecting the appropriately aligned version, but we also convert memset
23288943Sdim// of 0 into memclr.
24288943SdimSDValue ARMSelectionDAGInfo::
25288943SdimEmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
26288943Sdim                       SDValue Chain,
27288943Sdim                       SDValue Dst, SDValue Src,
28288943Sdim                       SDValue Size, unsigned Align,
29288943Sdim                       RTLIB::Libcall LC) const {
30288943Sdim  const ARMSubtarget &Subtarget =
31288943Sdim      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
32288943Sdim  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
33276479Sdim
34288943Sdim  // Only use a specialized AEABI function if the default version of this
35288943Sdim  // Libcall is an AEABI function.
36288943Sdim  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
37288943Sdim    return SDValue();
38288943Sdim
39288943Sdim  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
40288943Sdim  // able to translate memset to memclr and use the value to index the function
41288943Sdim  // name array.
42288943Sdim  enum {
43288943Sdim    AEABI_MEMCPY = 0,
44288943Sdim    AEABI_MEMMOVE,
45288943Sdim    AEABI_MEMSET,
46288943Sdim    AEABI_MEMCLR
47288943Sdim  } AEABILibcall;
48288943Sdim  switch (LC) {
49288943Sdim  case RTLIB::MEMCPY:
50288943Sdim    AEABILibcall = AEABI_MEMCPY;
51288943Sdim    break;
52288943Sdim  case RTLIB::MEMMOVE:
53288943Sdim    AEABILibcall = AEABI_MEMMOVE;
54288943Sdim    break;
55288943Sdim  case RTLIB::MEMSET:
56288943Sdim    AEABILibcall = AEABI_MEMSET;
57288943Sdim    if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
58288943Sdim      if (ConstantSrc->getZExtValue() == 0)
59288943Sdim        AEABILibcall = AEABI_MEMCLR;
60288943Sdim    break;
61288943Sdim  default:
62288943Sdim    return SDValue();
63288943Sdim  }
64288943Sdim
65288943Sdim  // Choose the most-aligned libcall variant that we can
66288943Sdim  enum {
67288943Sdim    ALIGN1 = 0,
68288943Sdim    ALIGN4,
69288943Sdim    ALIGN8
70288943Sdim  } AlignVariant;
71288943Sdim  if ((Align & 7) == 0)
72288943Sdim    AlignVariant = ALIGN8;
73288943Sdim  else if ((Align & 3) == 0)
74288943Sdim    AlignVariant = ALIGN4;
75288943Sdim  else
76288943Sdim    AlignVariant = ALIGN1;
77288943Sdim
78288943Sdim  TargetLowering::ArgListTy Args;
79288943Sdim  TargetLowering::ArgListEntry Entry;
80288943Sdim  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
81288943Sdim  Entry.Node = Dst;
82288943Sdim  Args.push_back(Entry);
83288943Sdim  if (AEABILibcall == AEABI_MEMCLR) {
84288943Sdim    Entry.Node = Size;
85288943Sdim    Args.push_back(Entry);
86288943Sdim  } else if (AEABILibcall == AEABI_MEMSET) {
87288943Sdim    // Adjust parameters for memset, EABI uses format (ptr, size, value),
88288943Sdim    // GNU library uses (ptr, value, size)
89288943Sdim    // See RTABI section 4.3.4
90288943Sdim    Entry.Node = Size;
91288943Sdim    Args.push_back(Entry);
92288943Sdim
93288943Sdim    // Extend or truncate the argument to be an i32 value for the call.
94288943Sdim    if (Src.getValueType().bitsGT(MVT::i32))
95288943Sdim      Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
96288943Sdim    else if (Src.getValueType().bitsLT(MVT::i32))
97288943Sdim      Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
98288943Sdim
99288943Sdim    Entry.Node = Src;
100288943Sdim    Entry.Ty = Type::getInt32Ty(*DAG.getContext());
101288943Sdim    Entry.isSExt = false;
102288943Sdim    Args.push_back(Entry);
103288943Sdim  } else {
104288943Sdim    Entry.Node = Src;
105288943Sdim    Args.push_back(Entry);
106288943Sdim
107288943Sdim    Entry.Node = Size;
108288943Sdim    Args.push_back(Entry);
109288943Sdim  }
110288943Sdim
111288943Sdim  char const *FunctionNames[4][3] = {
112288943Sdim    { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
113288943Sdim    { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
114288943Sdim    { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
115288943Sdim    { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
116288943Sdim  };
117288943Sdim  TargetLowering::CallLoweringInfo CLI(DAG);
118288943Sdim  CLI.setDebugLoc(dl)
119288943Sdim      .setChain(Chain)
120288943Sdim      .setCallee(
121288943Sdim           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
122288943Sdim           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
123288943Sdim                                 TLI->getPointerTy(DAG.getDataLayout())),
124288943Sdim           std::move(Args), 0)
125288943Sdim      .setDiscardResult();
126288943Sdim  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
127288943Sdim
128288943Sdim  return CallResult.second;
129207618Srdivacky}
130208599Srdivacky
131208599SrdivackySDValue
132261991SdimARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
133208599Srdivacky                                             SDValue Chain,
134208599Srdivacky                                             SDValue Dst, SDValue Src,
135208599Srdivacky                                             SDValue Size, unsigned Align,
136208599Srdivacky                                             bool isVolatile, bool AlwaysInline,
137218893Sdim                                             MachinePointerInfo DstPtrInfo,
138218893Sdim                                          MachinePointerInfo SrcPtrInfo) const {
139288943Sdim  const ARMSubtarget &Subtarget =
140288943Sdim      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
141208599Srdivacky  // Do repeated 4-byte loads and stores. To be improved.
142208599Srdivacky  // This requires 4-byte alignment.
143208599Srdivacky  if ((Align & 3) != 0)
144208599Srdivacky    return SDValue();
145221345Sdim  // This requires the copy size to be a constant, preferably
146208599Srdivacky  // within a subtarget-specific limit.
147208599Srdivacky  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
148208599Srdivacky  if (!ConstantSize)
149288943Sdim    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
150288943Sdim                                  RTLIB::MEMCPY);
151208599Srdivacky  uint64_t SizeVal = ConstantSize->getZExtValue();
152276479Sdim  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
153288943Sdim    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
154288943Sdim                                  RTLIB::MEMCPY);
155208599Srdivacky
156208599Srdivacky  unsigned BytesLeft = SizeVal & 3;
157208599Srdivacky  unsigned NumMemOps = SizeVal >> 2;
158208599Srdivacky  unsigned EmittedNumMemOps = 0;
159208599Srdivacky  EVT VT = MVT::i32;
160208599Srdivacky  unsigned VTSize = 4;
161208599Srdivacky  unsigned i = 0;
162276479Sdim  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
163296417Sdim  const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
164276479Sdim  SDValue TFOps[6];
165276479Sdim  SDValue Loads[6];
166208599Srdivacky  uint64_t SrcOff = 0, DstOff = 0;
167208599Srdivacky
168296417Sdim  // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
169296417Sdim  // VLDM/VSTM and make this code emit it when appropriate. This would reduce
170296417Sdim  // pressure on the general purpose registers. However this seems harder to map
171296417Sdim  // onto the register allocator's view of the world.
172208599Srdivacky
173296417Sdim  // The number of MEMCPY pseudo-instructions to emit. We use up to
174296417Sdim  // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
175296417Sdim  // later on. This is a lower bound on the number of MEMCPY operations we must
176296417Sdim  // emit.
177296417Sdim  unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
178208599Srdivacky
179296417Sdim  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
180296417Sdim
181296417Sdim  for (unsigned I = 0; I != NumMEMCPYs; ++I) {
182296417Sdim    // Evenly distribute registers among MEMCPY operations to reduce register
183296417Sdim    // pressure.
184296417Sdim    unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
185296417Sdim    unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
186296417Sdim
187296417Sdim    Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
188296417Sdim                      DAG.getConstant(NumRegs, dl, MVT::i32));
189296417Sdim    Src = Dst.getValue(1);
190296417Sdim    Chain = Dst.getValue(2);
191296417Sdim
192296417Sdim    DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
193296417Sdim    SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
194296417Sdim
195296417Sdim    EmittedNumMemOps = NextEmittedNumMemOps;
196208599Srdivacky  }
197208599Srdivacky
198208599Srdivacky  if (BytesLeft == 0)
199208599Srdivacky    return Chain;
200208599Srdivacky
201208599Srdivacky  // Issue loads / stores for the trailing (1 - 3) bytes.
202208599Srdivacky  unsigned BytesLeftSave = BytesLeft;
203208599Srdivacky  i = 0;
204208599Srdivacky  while (BytesLeft) {
205208599Srdivacky    if (BytesLeft >= 2) {
206208599Srdivacky      VT = MVT::i16;
207208599Srdivacky      VTSize = 2;
208208599Srdivacky    } else {
209208599Srdivacky      VT = MVT::i8;
210208599Srdivacky      VTSize = 1;
211208599Srdivacky    }
212208599Srdivacky
213208599Srdivacky    Loads[i] = DAG.getLoad(VT, dl, Chain,
214208599Srdivacky                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
215288943Sdim                                       DAG.getConstant(SrcOff, dl, MVT::i32)),
216234353Sdim                           SrcPtrInfo.getWithOffset(SrcOff),
217234353Sdim                           false, false, false, 0);
218208599Srdivacky    TFOps[i] = Loads[i].getValue(1);
219208599Srdivacky    ++i;
220208599Srdivacky    SrcOff += VTSize;
221208599Srdivacky    BytesLeft -= VTSize;
222208599Srdivacky  }
223276479Sdim  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
224276479Sdim                      makeArrayRef(TFOps, i));
225208599Srdivacky
226208599Srdivacky  i = 0;
227208599Srdivacky  BytesLeft = BytesLeftSave;
228208599Srdivacky  while (BytesLeft) {
229208599Srdivacky    if (BytesLeft >= 2) {
230208599Srdivacky      VT = MVT::i16;
231208599Srdivacky      VTSize = 2;
232208599Srdivacky    } else {
233208599Srdivacky      VT = MVT::i8;
234208599Srdivacky      VTSize = 1;
235208599Srdivacky    }
236208599Srdivacky
237208599Srdivacky    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
238208599Srdivacky                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
239288943Sdim                                        DAG.getConstant(DstOff, dl, MVT::i32)),
240218893Sdim                            DstPtrInfo.getWithOffset(DstOff), false, false, 0);
241208599Srdivacky    ++i;
242208599Srdivacky    DstOff += VTSize;
243208599Srdivacky    BytesLeft -= VTSize;
244208599Srdivacky  }
245276479Sdim  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
246276479Sdim                     makeArrayRef(TFOps, i));
247208599Srdivacky}
248223017Sdim
249288943Sdim
250226633SdimSDValue ARMSelectionDAGInfo::
251288943SdimEmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
252288943Sdim                         SDValue Chain,
253288943Sdim                         SDValue Dst, SDValue Src,
254288943Sdim                         SDValue Size, unsigned Align,
255288943Sdim                         bool isVolatile,
256288943Sdim                         MachinePointerInfo DstPtrInfo,
257288943Sdim                         MachinePointerInfo SrcPtrInfo) const {
258288943Sdim  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
259288943Sdim                                RTLIB::MEMMOVE);
260288943Sdim}
261288943Sdim
262288943Sdim
263288943SdimSDValue ARMSelectionDAGInfo::
264261991SdimEmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
265226633Sdim                        SDValue Chain, SDValue Dst,
266226633Sdim                        SDValue Src, SDValue Size,
267226633Sdim                        unsigned Align, bool isVolatile,
268226633Sdim                        MachinePointerInfo DstPtrInfo) const {
269288943Sdim  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
270288943Sdim                                RTLIB::MEMSET);
271223017Sdim}
272