1259698Sdim//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// 2259698Sdim// 3259698Sdim// The LLVM Compiler Infrastructure 4259698Sdim// 5259698Sdim// This file is distributed under the University of Illinois Open Source 6259698Sdim// License. See LICENSE.TXT for details. 7259698Sdim// 8259698Sdim//===----------------------------------------------------------------------===// 9259698Sdim// 10259698Sdim// This file implements the SystemZSelectionDAGInfo class. 11259698Sdim// 12259698Sdim//===----------------------------------------------------------------------===// 13259698Sdim 14259698Sdim#define DEBUG_TYPE "systemz-selectiondag-info" 15259698Sdim#include "SystemZTargetMachine.h" 16259698Sdim#include "llvm/CodeGen/SelectionDAG.h" 17259698Sdim 18259698Sdimusing namespace llvm; 19259698Sdim 20259698SdimSystemZSelectionDAGInfo:: 21259698SdimSystemZSelectionDAGInfo(const SystemZTargetMachine &TM) 22259698Sdim : TargetSelectionDAGInfo(TM) { 23259698Sdim} 24259698Sdim 25259698SdimSystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { 26259698Sdim} 27259698Sdim 28259698Sdim// Decide whether it is best to use a loop or straight-line code for 29259698Sdim// a block operation of Size bytes with source address Src and destination 30259698Sdim// address Dest. Sequence is the opcode to use for straight-line code 31259698Sdim// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). 32259698Sdim// Return the chain for the completed operation. 33259698Sdimstatic SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, 34259698Sdim unsigned Loop, SDValue Chain, SDValue Dst, 35259698Sdim SDValue Src, uint64_t Size) { 36259698Sdim EVT PtrVT = Src.getValueType(); 37259698Sdim // The heuristic we use is to prefer loops for anything that would 38259698Sdim // require 7 or more MVCs. With these kinds of sizes there isn't 39259698Sdim // much to choose between straight-line code and looping code, 40259698Sdim // since the time will be dominated by the MVCs themselves. 41259698Sdim // However, the loop has 4 or 5 instructions (depending on whether 42259698Sdim // the base addresses can be proved equal), so there doesn't seem 43259698Sdim // much point using a loop for 5 * 256 bytes or fewer. Anything in 44259698Sdim // the range (5 * 256, 6 * 256) will need another instruction after 45259698Sdim // the loop, so it doesn't seem worth using a loop then either. 46259698Sdim // The next value up, 6 * 256, can be implemented in the same 47259698Sdim // number of straight-line MVCs as 6 * 256 - 1. 48259698Sdim if (Size > 6 * 256) 49259698Sdim return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, 50259698Sdim DAG.getConstant(Size, PtrVT), 51259698Sdim DAG.getConstant(Size / 256, PtrVT)); 52259698Sdim return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, 53259698Sdim DAG.getConstant(Size, PtrVT)); 54259698Sdim} 55259698Sdim 56259698SdimSDValue SystemZSelectionDAGInfo:: 57259698SdimEmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 58259698Sdim SDValue Dst, SDValue Src, SDValue Size, unsigned Align, 59259698Sdim bool IsVolatile, bool AlwaysInline, 60259698Sdim MachinePointerInfo DstPtrInfo, 61259698Sdim MachinePointerInfo SrcPtrInfo) const { 62259698Sdim if (IsVolatile) 63259698Sdim return SDValue(); 64259698Sdim 65259698Sdim if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) 66259698Sdim return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, 67259698Sdim Chain, Dst, Src, CSize->getZExtValue()); 68259698Sdim return SDValue(); 69259698Sdim} 70259698Sdim 71259698Sdim// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by 72259698Sdim// Chain, Dst, ByteVal and Size. These cases are expected to use 73259698Sdim// MVI, MVHHI, MVHI and MVGHI respectively. 74259698Sdimstatic SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 75259698Sdim SDValue Dst, uint64_t ByteVal, uint64_t Size, 76259698Sdim unsigned Align, 77259698Sdim MachinePointerInfo DstPtrInfo) { 78259698Sdim uint64_t StoreVal = ByteVal; 79259698Sdim for (unsigned I = 1; I < Size; ++I) 80259698Sdim StoreVal |= ByteVal << (I * 8); 81259698Sdim return DAG.getStore(Chain, DL, 82259698Sdim DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), 83259698Sdim Dst, DstPtrInfo, false, false, Align); 84259698Sdim} 85259698Sdim 86259698SdimSDValue SystemZSelectionDAGInfo:: 87259698SdimEmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 88259698Sdim SDValue Dst, SDValue Byte, SDValue Size, 89259698Sdim unsigned Align, bool IsVolatile, 90259698Sdim MachinePointerInfo DstPtrInfo) const { 91259698Sdim EVT PtrVT = Dst.getValueType(); 92259698Sdim 93259698Sdim if (IsVolatile) 94259698Sdim return SDValue(); 95259698Sdim 96259698Sdim if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { 97259698Sdim uint64_t Bytes = CSize->getZExtValue(); 98259698Sdim if (Bytes == 0) 99259698Sdim return SDValue(); 100259698Sdim if (ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte)) { 101259698Sdim // Handle cases that can be done using at most two of 102259698Sdim // MVI, MVHI, MVHHI and MVGHI. The latter two can only be 103259698Sdim // used if ByteVal is all zeros or all ones; in other casees, 104259698Sdim // we can move at most 2 halfwords. 105259698Sdim uint64_t ByteVal = CByte->getZExtValue(); 106259698Sdim if (ByteVal == 0 || ByteVal == 255 ? 107259698Sdim Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : 108259698Sdim Bytes <= 4) { 109259698Sdim unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); 110259698Sdim unsigned Size2 = Bytes - Size1; 111259698Sdim SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, 112259698Sdim Align, DstPtrInfo); 113259698Sdim if (Size2 == 0) 114259698Sdim return Chain1; 115259698Sdim Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 116259698Sdim DAG.getConstant(Size1, PtrVT)); 117259698Sdim DstPtrInfo = DstPtrInfo.getWithOffset(Size1); 118259698Sdim SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, 119259698Sdim std::min(Align, Size1), DstPtrInfo); 120259698Sdim return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); 121259698Sdim } 122259698Sdim } else { 123259698Sdim // Handle one and two bytes using STC. 124259698Sdim if (Bytes <= 2) { 125259698Sdim SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, 126259698Sdim false, false, Align); 127259698Sdim if (Bytes == 1) 128259698Sdim return Chain1; 129259698Sdim SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 130259698Sdim DAG.getConstant(1, PtrVT)); 131259698Sdim SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, 132259698Sdim DstPtrInfo.getWithOffset(1), 133259698Sdim false, false, 1); 134259698Sdim return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); 135259698Sdim } 136259698Sdim } 137259698Sdim assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); 138259698Sdim 139259698Sdim // Handle the special case of a memset of 0, which can use XC. 140259698Sdim ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte); 141259698Sdim if (CByte && CByte->getZExtValue() == 0) 142259698Sdim return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, 143259698Sdim Chain, Dst, Dst, Bytes); 144259698Sdim 145259698Sdim // Copy the byte to the first location and then use MVC to copy 146259698Sdim // it to the rest. 147259698Sdim Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, 148259698Sdim false, false, Align); 149259698Sdim SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 150259698Sdim DAG.getConstant(1, PtrVT)); 151259698Sdim return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, 152259698Sdim Chain, DstPlus1, Dst, Bytes - 1); 153259698Sdim } 154259698Sdim return SDValue(); 155259698Sdim} 156259698Sdim 157259698Sdim// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), 158259698Sdim// deciding whether to use a loop or straight-line code. 159259698Sdimstatic SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 160259698Sdim SDValue Src1, SDValue Src2, uint64_t Size) { 161259698Sdim SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 162259698Sdim EVT PtrVT = Src1.getValueType(); 163259698Sdim // A two-CLC sequence is a clear win over a loop, not least because it 164259698Sdim // needs only one branch. A three-CLC sequence needs the same number 165259698Sdim // of branches as a loop (i.e. 2), but is shorter. That brings us to 166259698Sdim // lengths greater than 768 bytes. It seems relatively likely that 167259698Sdim // a difference will be found within the first 768 bytes, so we just 168259698Sdim // optimize for the smallest number of branch instructions, in order 169259698Sdim // to avoid polluting the prediction buffer too much. A loop only ever 170259698Sdim // needs 2 branches, whereas a straight-line sequence would need 3 or more. 171259698Sdim if (Size > 3 * 256) 172259698Sdim return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, 173259698Sdim DAG.getConstant(Size, PtrVT), 174259698Sdim DAG.getConstant(Size / 256, PtrVT)); 175259698Sdim return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, 176259698Sdim DAG.getConstant(Size, PtrVT)); 177259698Sdim} 178259698Sdim 179259698Sdim// Convert the current CC value into an integer that is 0 if CC == 0, 180259698Sdim// less than zero if CC == 1 and greater than zero if CC >= 2. 181259698Sdim// The sequence starts with IPM, which puts CC into bits 29 and 28 182259698Sdim// of an integer and clears bits 30 and 31. 183259698Sdimstatic SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { 184259698Sdim SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 185259698Sdim SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 186259698Sdim DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); 187259698Sdim SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, 188259698Sdim DAG.getConstant(31, MVT::i32)); 189259698Sdim return ROTL; 190259698Sdim} 191259698Sdim 192259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 193259698SdimEmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 194259698Sdim SDValue Src1, SDValue Src2, SDValue Size, 195259698Sdim MachinePointerInfo Op1PtrInfo, 196259698Sdim MachinePointerInfo Op2PtrInfo) const { 197259698Sdim if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { 198259698Sdim uint64_t Bytes = CSize->getZExtValue(); 199259698Sdim assert(Bytes > 0 && "Caller should have handled 0-size case"); 200259698Sdim Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); 201259698Sdim SDValue Glue = Chain.getValue(1); 202259698Sdim return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); 203259698Sdim } 204259698Sdim return std::make_pair(SDValue(), SDValue()); 205259698Sdim} 206259698Sdim 207259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 208259698SdimEmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 209259698Sdim SDValue Src, SDValue Char, SDValue Length, 210259698Sdim MachinePointerInfo SrcPtrInfo) const { 211259698Sdim // Use SRST to find the character. End is its address on success. 212259698Sdim EVT PtrVT = Src.getValueType(); 213259698Sdim SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); 214259698Sdim Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); 215259698Sdim Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); 216259698Sdim Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, 217259698Sdim DAG.getConstant(255, MVT::i32)); 218259698Sdim SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); 219259698Sdim SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, 220259698Sdim Limit, Src, Char); 221259698Sdim Chain = End.getValue(1); 222259698Sdim SDValue Glue = End.getValue(2); 223259698Sdim 224259698Sdim // Now select between End and null, depending on whether the character 225259698Sdim // was found. 226259698Sdim SmallVector<SDValue, 5> Ops; 227259698Sdim Ops.push_back(End); 228259698Sdim Ops.push_back(DAG.getConstant(0, PtrVT)); 229259698Sdim Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32)); 230259698Sdim Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32)); 231259698Sdim Ops.push_back(Glue); 232259698Sdim VTs = DAG.getVTList(PtrVT, MVT::Glue); 233259698Sdim End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); 234259698Sdim return std::make_pair(End, Chain); 235259698Sdim} 236259698Sdim 237259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 238259698SdimEmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 239259698Sdim SDValue Dest, SDValue Src, 240259698Sdim MachinePointerInfo DestPtrInfo, 241259698Sdim MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { 242259698Sdim SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); 243259698Sdim SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, 244259698Sdim DAG.getConstant(0, MVT::i32)); 245259698Sdim return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); 246259698Sdim} 247259698Sdim 248259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 249259698SdimEmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 250259698Sdim SDValue Src1, SDValue Src2, 251259698Sdim MachinePointerInfo Op1PtrInfo, 252259698Sdim MachinePointerInfo Op2PtrInfo) const { 253259698Sdim SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); 254259698Sdim SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, 255259698Sdim DAG.getConstant(0, MVT::i32)); 256259698Sdim Chain = Unused.getValue(1); 257259698Sdim SDValue Glue = Chain.getValue(2); 258259698Sdim return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); 259259698Sdim} 260259698Sdim 261259698Sdim// Search from Src for a null character, stopping once Src reaches Limit. 262259698Sdim// Return a pair of values, the first being the number of nonnull characters 263259698Sdim// and the second being the out chain. 264259698Sdim// 265259698Sdim// This can be used for strlen by setting Limit to 0. 266259698Sdimstatic std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, 267259698Sdim SDValue Chain, SDValue Src, 268259698Sdim SDValue Limit) { 269259698Sdim EVT PtrVT = Src.getValueType(); 270259698Sdim SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); 271259698Sdim SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, 272259698Sdim Limit, Src, DAG.getConstant(0, MVT::i32)); 273259698Sdim Chain = End.getValue(1); 274259698Sdim SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); 275259698Sdim return std::make_pair(Len, Chain); 276259698Sdim} 277259698Sdim 278259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 279259698SdimEmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 280259698Sdim SDValue Src, MachinePointerInfo SrcPtrInfo) const { 281259698Sdim EVT PtrVT = Src.getValueType(); 282259698Sdim return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT)); 283259698Sdim} 284259698Sdim 285259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 286259698SdimEmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 287259698Sdim SDValue Src, SDValue MaxLength, 288259698Sdim MachinePointerInfo SrcPtrInfo) const { 289259698Sdim EVT PtrVT = Src.getValueType(); 290259698Sdim MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); 291259698Sdim SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); 292259698Sdim return getBoundedStrlen(DAG, DL, Chain, Src, Limit); 293259698Sdim} 294