1207618Srdivacky//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// 2207618Srdivacky// 3207618Srdivacky// The LLVM Compiler Infrastructure 4207618Srdivacky// 5207618Srdivacky// This file is distributed under the University of Illinois Open Source 6207618Srdivacky// License. See LICENSE.TXT for details. 7207618Srdivacky// 8207618Srdivacky//===----------------------------------------------------------------------===// 9207618Srdivacky// 10207618Srdivacky// This file implements the ARMSelectionDAGInfo class. 11207618Srdivacky// 12207618Srdivacky//===----------------------------------------------------------------------===// 13207618Srdivacky 14208599Srdivacky#include "ARMTargetMachine.h" 15223017Sdim#include "llvm/CodeGen/SelectionDAG.h" 16249423Sdim#include "llvm/IR/DerivedTypes.h" 17207618Srdivackyusing namespace llvm; 18207618Srdivacky 19276479Sdim#define DEBUG_TYPE "arm-selectiondag-info" 20207618Srdivacky 21288943Sdim// Emit, if possible, a specialized version of the given Libcall. Typically this 22288943Sdim// means selecting the appropriately aligned version, but we also convert memset 23288943Sdim// of 0 into memclr. 24288943SdimSDValue ARMSelectionDAGInfo:: 25288943SdimEmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, 26288943Sdim SDValue Chain, 27288943Sdim SDValue Dst, SDValue Src, 28288943Sdim SDValue Size, unsigned Align, 29288943Sdim RTLIB::Libcall LC) const { 30288943Sdim const ARMSubtarget &Subtarget = 31288943Sdim DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 32288943Sdim const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); 33276479Sdim 34288943Sdim // Only use a specialized AEABI function if the default version of this 35288943Sdim // Libcall is an AEABI function. 36288943Sdim if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) 37288943Sdim return SDValue(); 38288943Sdim 39288943Sdim // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be 40288943Sdim // able to translate memset to memclr and use the value to index the function 41288943Sdim // name array. 42288943Sdim enum { 43288943Sdim AEABI_MEMCPY = 0, 44288943Sdim AEABI_MEMMOVE, 45288943Sdim AEABI_MEMSET, 46288943Sdim AEABI_MEMCLR 47288943Sdim } AEABILibcall; 48288943Sdim switch (LC) { 49288943Sdim case RTLIB::MEMCPY: 50288943Sdim AEABILibcall = AEABI_MEMCPY; 51288943Sdim break; 52288943Sdim case RTLIB::MEMMOVE: 53288943Sdim AEABILibcall = AEABI_MEMMOVE; 54288943Sdim break; 55288943Sdim case RTLIB::MEMSET: 56288943Sdim AEABILibcall = AEABI_MEMSET; 57288943Sdim if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) 58288943Sdim if (ConstantSrc->getZExtValue() == 0) 59288943Sdim AEABILibcall = AEABI_MEMCLR; 60288943Sdim break; 61288943Sdim default: 62288943Sdim return SDValue(); 63288943Sdim } 64288943Sdim 65288943Sdim // Choose the most-aligned libcall variant that we can 66288943Sdim enum { 67288943Sdim ALIGN1 = 0, 68288943Sdim ALIGN4, 69288943Sdim ALIGN8 70288943Sdim } AlignVariant; 71288943Sdim if ((Align & 7) == 0) 72288943Sdim AlignVariant = ALIGN8; 73288943Sdim else if ((Align & 3) == 0) 74288943Sdim AlignVariant = ALIGN4; 75288943Sdim else 76288943Sdim AlignVariant = ALIGN1; 77288943Sdim 78288943Sdim TargetLowering::ArgListTy Args; 79288943Sdim TargetLowering::ArgListEntry Entry; 80288943Sdim Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 81288943Sdim Entry.Node = Dst; 82288943Sdim Args.push_back(Entry); 83288943Sdim if (AEABILibcall == AEABI_MEMCLR) { 84288943Sdim Entry.Node = Size; 85288943Sdim Args.push_back(Entry); 86288943Sdim } else if (AEABILibcall == AEABI_MEMSET) { 87288943Sdim // Adjust parameters for memset, EABI uses format (ptr, size, value), 88288943Sdim // GNU library uses (ptr, value, size) 89288943Sdim // See RTABI section 4.3.4 90288943Sdim Entry.Node = Size; 91288943Sdim Args.push_back(Entry); 92288943Sdim 93288943Sdim // Extend or truncate the argument to be an i32 value for the call. 94288943Sdim if (Src.getValueType().bitsGT(MVT::i32)) 95288943Sdim Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); 96288943Sdim else if (Src.getValueType().bitsLT(MVT::i32)) 97288943Sdim Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); 98288943Sdim 99288943Sdim Entry.Node = Src; 100288943Sdim Entry.Ty = Type::getInt32Ty(*DAG.getContext()); 101288943Sdim Entry.isSExt = false; 102288943Sdim Args.push_back(Entry); 103288943Sdim } else { 104288943Sdim Entry.Node = Src; 105288943Sdim Args.push_back(Entry); 106288943Sdim 107288943Sdim Entry.Node = Size; 108288943Sdim Args.push_back(Entry); 109288943Sdim } 110288943Sdim 111288943Sdim char const *FunctionNames[4][3] = { 112288943Sdim { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, 113288943Sdim { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, 114288943Sdim { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, 115288943Sdim { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } 116288943Sdim }; 117288943Sdim TargetLowering::CallLoweringInfo CLI(DAG); 118288943Sdim CLI.setDebugLoc(dl) 119288943Sdim .setChain(Chain) 120288943Sdim .setCallee( 121288943Sdim TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), 122288943Sdim DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], 123288943Sdim TLI->getPointerTy(DAG.getDataLayout())), 124288943Sdim std::move(Args), 0) 125288943Sdim .setDiscardResult(); 126288943Sdim std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 127288943Sdim 128288943Sdim return CallResult.second; 129207618Srdivacky} 130208599Srdivacky 131208599SrdivackySDValue 132261991SdimARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, 133208599Srdivacky SDValue Chain, 134208599Srdivacky SDValue Dst, SDValue Src, 135208599Srdivacky SDValue Size, unsigned Align, 136208599Srdivacky bool isVolatile, bool AlwaysInline, 137218893Sdim MachinePointerInfo DstPtrInfo, 138218893Sdim MachinePointerInfo SrcPtrInfo) const { 139288943Sdim const ARMSubtarget &Subtarget = 140288943Sdim DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 141208599Srdivacky // Do repeated 4-byte loads and stores. To be improved. 142208599Srdivacky // This requires 4-byte alignment. 143208599Srdivacky if ((Align & 3) != 0) 144208599Srdivacky return SDValue(); 145221345Sdim // This requires the copy size to be a constant, preferably 146208599Srdivacky // within a subtarget-specific limit. 147208599Srdivacky ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 148208599Srdivacky if (!ConstantSize) 149288943Sdim return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 150288943Sdim RTLIB::MEMCPY); 151208599Srdivacky uint64_t SizeVal = ConstantSize->getZExtValue(); 152276479Sdim if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) 153288943Sdim return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 154288943Sdim RTLIB::MEMCPY); 155208599Srdivacky 156208599Srdivacky unsigned BytesLeft = SizeVal & 3; 157208599Srdivacky unsigned NumMemOps = SizeVal >> 2; 158208599Srdivacky unsigned EmittedNumMemOps = 0; 159208599Srdivacky EVT VT = MVT::i32; 160208599Srdivacky unsigned VTSize = 4; 161208599Srdivacky unsigned i = 0; 162276479Sdim // Emit a maximum of 4 loads in Thumb1 since we have fewer registers 163296417Sdim const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; 164276479Sdim SDValue TFOps[6]; 165276479Sdim SDValue Loads[6]; 166208599Srdivacky uint64_t SrcOff = 0, DstOff = 0; 167208599Srdivacky 168296417Sdim // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to 169296417Sdim // VLDM/VSTM and make this code emit it when appropriate. This would reduce 170296417Sdim // pressure on the general purpose registers. However this seems harder to map 171296417Sdim // onto the register allocator's view of the world. 172208599Srdivacky 173296417Sdim // The number of MEMCPY pseudo-instructions to emit. We use up to 174296417Sdim // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm 175296417Sdim // later on. This is a lower bound on the number of MEMCPY operations we must 176296417Sdim // emit. 177296417Sdim unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; 178208599Srdivacky 179296417Sdim SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); 180296417Sdim 181296417Sdim for (unsigned I = 0; I != NumMEMCPYs; ++I) { 182296417Sdim // Evenly distribute registers among MEMCPY operations to reduce register 183296417Sdim // pressure. 184296417Sdim unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; 185296417Sdim unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; 186296417Sdim 187296417Sdim Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, 188296417Sdim DAG.getConstant(NumRegs, dl, MVT::i32)); 189296417Sdim Src = Dst.getValue(1); 190296417Sdim Chain = Dst.getValue(2); 191296417Sdim 192296417Sdim DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); 193296417Sdim SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); 194296417Sdim 195296417Sdim EmittedNumMemOps = NextEmittedNumMemOps; 196208599Srdivacky } 197208599Srdivacky 198208599Srdivacky if (BytesLeft == 0) 199208599Srdivacky return Chain; 200208599Srdivacky 201208599Srdivacky // Issue loads / stores for the trailing (1 - 3) bytes. 202208599Srdivacky unsigned BytesLeftSave = BytesLeft; 203208599Srdivacky i = 0; 204208599Srdivacky while (BytesLeft) { 205208599Srdivacky if (BytesLeft >= 2) { 206208599Srdivacky VT = MVT::i16; 207208599Srdivacky VTSize = 2; 208208599Srdivacky } else { 209208599Srdivacky VT = MVT::i8; 210208599Srdivacky VTSize = 1; 211208599Srdivacky } 212208599Srdivacky 213208599Srdivacky Loads[i] = DAG.getLoad(VT, dl, Chain, 214208599Srdivacky DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 215288943Sdim DAG.getConstant(SrcOff, dl, MVT::i32)), 216234353Sdim SrcPtrInfo.getWithOffset(SrcOff), 217234353Sdim false, false, false, 0); 218208599Srdivacky TFOps[i] = Loads[i].getValue(1); 219208599Srdivacky ++i; 220208599Srdivacky SrcOff += VTSize; 221208599Srdivacky BytesLeft -= VTSize; 222208599Srdivacky } 223276479Sdim Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 224276479Sdim makeArrayRef(TFOps, i)); 225208599Srdivacky 226208599Srdivacky i = 0; 227208599Srdivacky BytesLeft = BytesLeftSave; 228208599Srdivacky while (BytesLeft) { 229208599Srdivacky if (BytesLeft >= 2) { 230208599Srdivacky VT = MVT::i16; 231208599Srdivacky VTSize = 2; 232208599Srdivacky } else { 233208599Srdivacky VT = MVT::i8; 234208599Srdivacky VTSize = 1; 235208599Srdivacky } 236208599Srdivacky 237208599Srdivacky TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 238208599Srdivacky DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 239288943Sdim DAG.getConstant(DstOff, dl, MVT::i32)), 240218893Sdim DstPtrInfo.getWithOffset(DstOff), false, false, 0); 241208599Srdivacky ++i; 242208599Srdivacky DstOff += VTSize; 243208599Srdivacky BytesLeft -= VTSize; 244208599Srdivacky } 245276479Sdim return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 246276479Sdim makeArrayRef(TFOps, i)); 247208599Srdivacky} 248223017Sdim 249288943Sdim 250226633SdimSDValue ARMSelectionDAGInfo:: 251288943SdimEmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, 252288943Sdim SDValue Chain, 253288943Sdim SDValue Dst, SDValue Src, 254288943Sdim SDValue Size, unsigned Align, 255288943Sdim bool isVolatile, 256288943Sdim MachinePointerInfo DstPtrInfo, 257288943Sdim MachinePointerInfo SrcPtrInfo) const { 258288943Sdim return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 259288943Sdim RTLIB::MEMMOVE); 260288943Sdim} 261288943Sdim 262288943Sdim 263288943SdimSDValue ARMSelectionDAGInfo:: 264261991SdimEmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 265226633Sdim SDValue Chain, SDValue Dst, 266226633Sdim SDValue Src, SDValue Size, 267226633Sdim unsigned Align, bool isVolatile, 268226633Sdim MachinePointerInfo DstPtrInfo) const { 269288943Sdim return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 270288943Sdim RTLIB::MEMSET); 271223017Sdim} 272