1259698Sdim//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
2259698Sdim//
3259698Sdim//                     The LLVM Compiler Infrastructure
4259698Sdim//
5259698Sdim// This file is distributed under the University of Illinois Open Source
6259698Sdim// License. See LICENSE.TXT for details.
7259698Sdim//
8259698Sdim//===----------------------------------------------------------------------===//
9259698Sdim//
10259698Sdim// This file implements the SystemZSelectionDAGInfo class.
11259698Sdim//
12259698Sdim//===----------------------------------------------------------------------===//
13259698Sdim
14259698Sdim#define DEBUG_TYPE "systemz-selectiondag-info"
15259698Sdim#include "SystemZTargetMachine.h"
16259698Sdim#include "llvm/CodeGen/SelectionDAG.h"
17259698Sdim
18259698Sdimusing namespace llvm;
19259698Sdim
20259698SdimSystemZSelectionDAGInfo::
21259698SdimSystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
22259698Sdim  : TargetSelectionDAGInfo(TM) {
23259698Sdim}
24259698Sdim
25259698SdimSystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
26259698Sdim}
27259698Sdim
28259698Sdim// Decide whether it is best to use a loop or straight-line code for
29259698Sdim// a block operation of Size bytes with source address Src and destination
30259698Sdim// address Dest.  Sequence is the opcode to use for straight-line code
31259698Sdim// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
32259698Sdim// Return the chain for the completed operation.
33259698Sdimstatic SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
34259698Sdim                          unsigned Loop, SDValue Chain, SDValue Dst,
35259698Sdim                          SDValue Src, uint64_t Size) {
36259698Sdim  EVT PtrVT = Src.getValueType();
37259698Sdim  // The heuristic we use is to prefer loops for anything that would
38259698Sdim  // require 7 or more MVCs.  With these kinds of sizes there isn't
39259698Sdim  // much to choose between straight-line code and looping code,
40259698Sdim  // since the time will be dominated by the MVCs themselves.
41259698Sdim  // However, the loop has 4 or 5 instructions (depending on whether
42259698Sdim  // the base addresses can be proved equal), so there doesn't seem
43259698Sdim  // much point using a loop for 5 * 256 bytes or fewer.  Anything in
44259698Sdim  // the range (5 * 256, 6 * 256) will need another instruction after
45259698Sdim  // the loop, so it doesn't seem worth using a loop then either.
46259698Sdim  // The next value up, 6 * 256, can be implemented in the same
47259698Sdim  // number of straight-line MVCs as 6 * 256 - 1.
48259698Sdim  if (Size > 6 * 256)
49259698Sdim    return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
50259698Sdim                       DAG.getConstant(Size, PtrVT),
51259698Sdim                       DAG.getConstant(Size / 256, PtrVT));
52259698Sdim  return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
53259698Sdim                     DAG.getConstant(Size, PtrVT));
54259698Sdim}
55259698Sdim
56259698SdimSDValue SystemZSelectionDAGInfo::
57259698SdimEmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
58259698Sdim                        SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
59259698Sdim                        bool IsVolatile, bool AlwaysInline,
60259698Sdim                        MachinePointerInfo DstPtrInfo,
61259698Sdim                        MachinePointerInfo SrcPtrInfo) const {
62259698Sdim  if (IsVolatile)
63259698Sdim    return SDValue();
64259698Sdim
65259698Sdim  if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size))
66259698Sdim    return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
67259698Sdim                      Chain, Dst, Src, CSize->getZExtValue());
68259698Sdim  return SDValue();
69259698Sdim}
70259698Sdim
71259698Sdim// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
72259698Sdim// Chain, Dst, ByteVal and Size.  These cases are expected to use
73259698Sdim// MVI, MVHHI, MVHI and MVGHI respectively.
74259698Sdimstatic SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
75259698Sdim                           SDValue Dst, uint64_t ByteVal, uint64_t Size,
76259698Sdim                           unsigned Align,
77259698Sdim                           MachinePointerInfo DstPtrInfo) {
78259698Sdim  uint64_t StoreVal = ByteVal;
79259698Sdim  for (unsigned I = 1; I < Size; ++I)
80259698Sdim    StoreVal |= ByteVal << (I * 8);
81259698Sdim  return DAG.getStore(Chain, DL,
82259698Sdim                      DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)),
83259698Sdim                      Dst, DstPtrInfo, false, false, Align);
84259698Sdim}
85259698Sdim
86259698SdimSDValue SystemZSelectionDAGInfo::
87259698SdimEmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
88259698Sdim                        SDValue Dst, SDValue Byte, SDValue Size,
89259698Sdim                        unsigned Align, bool IsVolatile,
90259698Sdim                        MachinePointerInfo DstPtrInfo) const {
91259698Sdim  EVT PtrVT = Dst.getValueType();
92259698Sdim
93259698Sdim  if (IsVolatile)
94259698Sdim    return SDValue();
95259698Sdim
96259698Sdim  if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
97259698Sdim    uint64_t Bytes = CSize->getZExtValue();
98259698Sdim    if (Bytes == 0)
99259698Sdim      return SDValue();
100259698Sdim    if (ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte)) {
101259698Sdim      // Handle cases that can be done using at most two of
102259698Sdim      // MVI, MVHI, MVHHI and MVGHI.  The latter two can only be
103259698Sdim      // used if ByteVal is all zeros or all ones; in other casees,
104259698Sdim      // we can move at most 2 halfwords.
105259698Sdim      uint64_t ByteVal = CByte->getZExtValue();
106259698Sdim      if (ByteVal == 0 || ByteVal == 255 ?
107259698Sdim          Bytes <= 16 && CountPopulation_64(Bytes) <= 2 :
108259698Sdim          Bytes <= 4) {
109259698Sdim        unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes);
110259698Sdim        unsigned Size2 = Bytes - Size1;
111259698Sdim        SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1,
112259698Sdim                                     Align, DstPtrInfo);
113259698Sdim        if (Size2 == 0)
114259698Sdim          return Chain1;
115259698Sdim        Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
116259698Sdim                          DAG.getConstant(Size1, PtrVT));
117259698Sdim        DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
118259698Sdim        SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
119259698Sdim                                     std::min(Align, Size1), DstPtrInfo);
120259698Sdim        return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
121259698Sdim      }
122259698Sdim    } else {
123259698Sdim      // Handle one and two bytes using STC.
124259698Sdim      if (Bytes <= 2) {
125259698Sdim        SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
126259698Sdim                                      false, false, Align);
127259698Sdim        if (Bytes == 1)
128259698Sdim          return Chain1;
129259698Sdim        SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
130259698Sdim                                   DAG.getConstant(1, PtrVT));
131259698Sdim        SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
132259698Sdim                                      DstPtrInfo.getWithOffset(1),
133259698Sdim                                      false, false, 1);
134259698Sdim        return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
135259698Sdim      }
136259698Sdim    }
137259698Sdim    assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
138259698Sdim
139259698Sdim    // Handle the special case of a memset of 0, which can use XC.
140259698Sdim    ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte);
141259698Sdim    if (CByte && CByte->getZExtValue() == 0)
142259698Sdim      return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
143259698Sdim                        Chain, Dst, Dst, Bytes);
144259698Sdim
145259698Sdim    // Copy the byte to the first location and then use MVC to copy
146259698Sdim    // it to the rest.
147259698Sdim    Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
148259698Sdim                         false, false, Align);
149259698Sdim    SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
150259698Sdim                                   DAG.getConstant(1, PtrVT));
151259698Sdim    return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
152259698Sdim                      Chain, DstPlus1, Dst, Bytes - 1);
153259698Sdim  }
154259698Sdim  return SDValue();
155259698Sdim}
156259698Sdim
157259698Sdim// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
158259698Sdim// deciding whether to use a loop or straight-line code.
159259698Sdimstatic SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
160259698Sdim                       SDValue Src1, SDValue Src2, uint64_t Size) {
161259698Sdim  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
162259698Sdim  EVT PtrVT = Src1.getValueType();
163259698Sdim  // A two-CLC sequence is a clear win over a loop, not least because it
164259698Sdim  // needs only one branch.  A three-CLC sequence needs the same number
165259698Sdim  // of branches as a loop (i.e. 2), but is shorter.  That brings us to
166259698Sdim  // lengths greater than 768 bytes.  It seems relatively likely that
167259698Sdim  // a difference will be found within the first 768 bytes, so we just
168259698Sdim  // optimize for the smallest number of branch instructions, in order
169259698Sdim  // to avoid polluting the prediction buffer too much.  A loop only ever
170259698Sdim  // needs 2 branches, whereas a straight-line sequence would need 3 or more.
171259698Sdim  if (Size > 3 * 256)
172259698Sdim    return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
173259698Sdim                       DAG.getConstant(Size, PtrVT),
174259698Sdim                       DAG.getConstant(Size / 256, PtrVT));
175259698Sdim  return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
176259698Sdim                     DAG.getConstant(Size, PtrVT));
177259698Sdim}
178259698Sdim
179259698Sdim// Convert the current CC value into an integer that is 0 if CC == 0,
180259698Sdim// less than zero if CC == 1 and greater than zero if CC >= 2.
181259698Sdim// The sequence starts with IPM, which puts CC into bits 29 and 28
182259698Sdim// of an integer and clears bits 30 and 31.
183259698Sdimstatic SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
184259698Sdim  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
185259698Sdim  SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
186259698Sdim                            DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
187259698Sdim  SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
188259698Sdim                             DAG.getConstant(31, MVT::i32));
189259698Sdim  return ROTL;
190259698Sdim}
191259698Sdim
192259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
193259698SdimEmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
194259698Sdim                        SDValue Src1, SDValue Src2, SDValue Size,
195259698Sdim                        MachinePointerInfo Op1PtrInfo,
196259698Sdim                        MachinePointerInfo Op2PtrInfo) const {
197259698Sdim  if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
198259698Sdim    uint64_t Bytes = CSize->getZExtValue();
199259698Sdim    assert(Bytes > 0 && "Caller should have handled 0-size case");
200259698Sdim    Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
201259698Sdim    SDValue Glue = Chain.getValue(1);
202259698Sdim    return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
203259698Sdim  }
204259698Sdim  return std::make_pair(SDValue(), SDValue());
205259698Sdim}
206259698Sdim
207259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
208259698SdimEmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
209259698Sdim                        SDValue Src, SDValue Char, SDValue Length,
210259698Sdim                        MachinePointerInfo SrcPtrInfo) const {
211259698Sdim  // Use SRST to find the character.  End is its address on success.
212259698Sdim  EVT PtrVT = Src.getValueType();
213259698Sdim  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
214259698Sdim  Length = DAG.getZExtOrTrunc(Length, DL, PtrVT);
215259698Sdim  Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32);
216259698Sdim  Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char,
217259698Sdim                     DAG.getConstant(255, MVT::i32));
218259698Sdim  SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length);
219259698Sdim  SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
220259698Sdim                            Limit, Src, Char);
221259698Sdim  Chain = End.getValue(1);
222259698Sdim  SDValue Glue = End.getValue(2);
223259698Sdim
224259698Sdim  // Now select between End and null, depending on whether the character
225259698Sdim  // was found.
226259698Sdim  SmallVector<SDValue, 5> Ops;
227259698Sdim  Ops.push_back(End);
228259698Sdim  Ops.push_back(DAG.getConstant(0, PtrVT));
229259698Sdim  Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32));
230259698Sdim  Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
231259698Sdim  Ops.push_back(Glue);
232259698Sdim  VTs = DAG.getVTList(PtrVT, MVT::Glue);
233259698Sdim  End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
234259698Sdim  return std::make_pair(End, Chain);
235259698Sdim}
236259698Sdim
237259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
238259698SdimEmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
239259698Sdim                        SDValue Dest, SDValue Src,
240259698Sdim                        MachinePointerInfo DestPtrInfo,
241259698Sdim                        MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
242259698Sdim  SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
243259698Sdim  SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
244259698Sdim                                DAG.getConstant(0, MVT::i32));
245259698Sdim  return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
246259698Sdim}
247259698Sdim
248259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
249259698SdimEmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
250259698Sdim                        SDValue Src1, SDValue Src2,
251259698Sdim                        MachinePointerInfo Op1PtrInfo,
252259698Sdim                        MachinePointerInfo Op2PtrInfo) const {
253259698Sdim  SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
254259698Sdim  SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
255259698Sdim                               DAG.getConstant(0, MVT::i32));
256259698Sdim  Chain = Unused.getValue(1);
257259698Sdim  SDValue Glue = Chain.getValue(2);
258259698Sdim  return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
259259698Sdim}
260259698Sdim
261259698Sdim// Search from Src for a null character, stopping once Src reaches Limit.
262259698Sdim// Return a pair of values, the first being the number of nonnull characters
263259698Sdim// and the second being the out chain.
264259698Sdim//
265259698Sdim// This can be used for strlen by setting Limit to 0.
266259698Sdimstatic std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
267259698Sdim                                                    SDValue Chain, SDValue Src,
268259698Sdim                                                    SDValue Limit) {
269259698Sdim  EVT PtrVT = Src.getValueType();
270259698Sdim  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
271259698Sdim  SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
272259698Sdim                            Limit, Src, DAG.getConstant(0, MVT::i32));
273259698Sdim  Chain = End.getValue(1);
274259698Sdim  SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
275259698Sdim  return std::make_pair(Len, Chain);
276259698Sdim}
277259698Sdim
278259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
279259698SdimEmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
280259698Sdim                        SDValue Src, MachinePointerInfo SrcPtrInfo) const {
281259698Sdim  EVT PtrVT = Src.getValueType();
282259698Sdim  return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT));
283259698Sdim}
284259698Sdim
285259698Sdimstd::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
286259698SdimEmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
287259698Sdim                         SDValue Src, SDValue MaxLength,
288259698Sdim                         MachinePointerInfo SrcPtrInfo) const {
289259698Sdim  EVT PtrVT = Src.getValueType();
290259698Sdim  MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT);
291259698Sdim  SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength);
292259698Sdim  return getBoundedStrlen(DAG, DL, Chain, Src, Limit);
293259698Sdim}
294