1//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that SystemZ uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
15#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
16
17#include "SystemZ.h"
18#include "SystemZInstrInfo.h"
19#include "llvm/CodeGen/MachineBasicBlock.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21#include "llvm/CodeGen/TargetLowering.h"
22#include <optional>
23
24namespace llvm {
25namespace SystemZISD {
26enum NodeType : unsigned {
27  FIRST_NUMBER = ISD::BUILTIN_OP_END,
28
29  // Return with a glue operand.  Operand 0 is the chain operand.
30  RET_GLUE,
31
32  // Calls a function.  Operand 0 is the chain operand and operand 1
33  // is the target address.  The arguments start at operand 2.
34  // There is an optional glue operand at the end.
35  CALL,
36  SIBCALL,
37
38  // TLS calls.  Like regular calls, except operand 1 is the TLS symbol.
39  // (The call target is implicitly __tls_get_offset.)
40  TLS_GDCALL,
41  TLS_LDCALL,
42
43  // Wraps a TargetGlobalAddress that should be loaded using PC-relative
44  // accesses (LARL).  Operand 0 is the address.
45  PCREL_WRAPPER,
46
47  // Used in cases where an offset is applied to a TargetGlobalAddress.
48  // Operand 0 is the full TargetGlobalAddress and operand 1 is a
49  // PCREL_WRAPPER for an anchor point.  This is used so that we can
50  // cheaply refer to either the full address or the anchor point
51  // as a register base.
52  PCREL_OFFSET,
53
54  // Integer comparisons.  There are three operands: the two values
55  // to compare, and an integer of type SystemZICMP.
56  ICMP,
57
58  // Floating-point comparisons.  The two operands are the values to compare.
59  FCMP,
60
61  // Test under mask.  The first operand is ANDed with the second operand
62  // and the condition codes are set on the result.  The third operand is
63  // a boolean that is true if the condition codes need to distinguish
64  // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
65  // register forms do but the memory forms don't).
66  TM,
67
68  // Branches if a condition is true.  Operand 0 is the chain operand;
69  // operand 1 is the 4-bit condition-code mask, with bit N in
70  // big-endian order meaning "branch if CC=N"; operand 2 is the
71  // target block and operand 3 is the flag operand.
72  BR_CCMASK,
73
74  // Selects between operand 0 and operand 1.  Operand 2 is the
75  // mask of condition-code values for which operand 0 should be
76  // chosen over operand 1; it has the same form as BR_CCMASK.
77  // Operand 3 is the flag operand.
78  SELECT_CCMASK,
79
80  // Evaluates to the gap between the stack pointer and the
81  // base of the dynamically-allocatable area.
82  ADJDYNALLOC,
83
84  // For allocating stack space when using stack clash protector.
85  // Allocation is performed by block, and each block is probed.
86  PROBED_ALLOCA,
87
88  // Count number of bits set in operand 0 per byte.
89  POPCNT,
90
91  // Wrappers around the ISD opcodes of the same name.  The output is GR128.
92  // Input operands may be GR64 or GR32, depending on the instruction.
93  SMUL_LOHI,
94  UMUL_LOHI,
95  SDIVREM,
96  UDIVREM,
97
98  // Add/subtract with overflow/carry.  These have the same operands as
99  // the corresponding standard operations, except with the carry flag
100  // replaced by a condition code value.
101  SADDO, SSUBO, UADDO, USUBO, ADDCARRY, SUBCARRY,
102
103  // Set the condition code from a boolean value in operand 0.
104  // Operand 1 is a mask of all condition-code values that may result of this
105  // operation, operand 2 is a mask of condition-code values that may result
106  // if the boolean is true.
107  // Note that this operation is always optimized away, we will never
108  // generate any code for it.
109  GET_CCMASK,
110
111  // Use a series of MVCs to copy bytes from one memory location to another.
112  // The operands are:
113  // - the target address
114  // - the source address
115  // - the constant length
116  //
117  // This isn't a memory opcode because we'd need to attach two
118  // MachineMemOperands rather than one.
119  MVC,
120
121  // Similar to MVC, but for logic operations (AND, OR, XOR).
122  NC,
123  OC,
124  XC,
125
126  // Use CLC to compare two blocks of memory, with the same comments
127  // as for MVC.
128  CLC,
129
130  // Use MVC to set a block of memory after storing the first byte.
131  MEMSET_MVC,
132
133  // Use an MVST-based sequence to implement stpcpy().
134  STPCPY,
135
136  // Use a CLST-based sequence to implement strcmp().  The two input operands
137  // are the addresses of the strings to compare.
138  STRCMP,
139
140  // Use an SRST-based sequence to search a block of memory.  The first
141  // operand is the end address, the second is the start, and the third
142  // is the character to search for.  CC is set to 1 on success and 2
143  // on failure.
144  SEARCH_STRING,
145
146  // Store the CC value in bits 29 and 28 of an integer.
147  IPM,
148
149  // Transaction begin.  The first operand is the chain, the second
150  // the TDB pointer, and the third the immediate control field.
151  // Returns CC value and chain.
152  TBEGIN,
153  TBEGIN_NOFLOAT,
154
155  // Transaction end.  Just the chain operand.  Returns CC value and chain.
156  TEND,
157
158  // Create a vector constant by filling byte N of the result with bit
159  // 15-N of the single operand.
160  BYTE_MASK,
161
162  // Create a vector constant by replicating an element-sized RISBG-style mask.
163  // The first operand specifies the starting set bit and the second operand
164  // specifies the ending set bit.  Both operands count from the MSB of the
165  // element.
166  ROTATE_MASK,
167
168  // Replicate a GPR scalar value into all elements of a vector.
169  REPLICATE,
170
171  // Create a vector from two i64 GPRs.
172  JOIN_DWORDS,
173
174  // Replicate one element of a vector into all elements.  The first operand
175  // is the vector and the second is the index of the element to replicate.
176  SPLAT,
177
178  // Interleave elements from the high half of operand 0 and the high half
179  // of operand 1.
180  MERGE_HIGH,
181
182  // Likewise for the low halves.
183  MERGE_LOW,
184
185  // Concatenate the vectors in the first two operands, shift them left
186  // by the third operand, and take the first half of the result.
187  SHL_DOUBLE,
188
189  // Take one element of the first v2i64 operand and the one element of
190  // the second v2i64 operand and concatenate them to form a v2i64 result.
191  // The third operand is a 4-bit value of the form 0A0B, where A and B
192  // are the element selectors for the first operand and second operands
193  // respectively.
194  PERMUTE_DWORDS,
195
196  // Perform a general vector permute on vector operands 0 and 1.
197  // Each byte of operand 2 controls the corresponding byte of the result,
198  // in the same way as a byte-level VECTOR_SHUFFLE mask.
199  PERMUTE,
200
201  // Pack vector operands 0 and 1 into a single vector with half-sized elements.
202  PACK,
203
204  // Likewise, but saturate the result and set CC.  PACKS_CC does signed
205  // saturation and PACKLS_CC does unsigned saturation.
206  PACKS_CC,
207  PACKLS_CC,
208
209  // Unpack the first half of vector operand 0 into double-sized elements.
210  // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
211  UNPACK_HIGH,
212  UNPACKL_HIGH,
213
214  // Likewise for the second half.
215  UNPACK_LOW,
216  UNPACKL_LOW,
217
218  // Shift/rotate each element of vector operand 0 by the number of bits
219  // specified by scalar operand 1.
220  VSHL_BY_SCALAR,
221  VSRL_BY_SCALAR,
222  VSRA_BY_SCALAR,
223  VROTL_BY_SCALAR,
224
225  // For each element of the output type, sum across all sub-elements of
226  // operand 0 belonging to the corresponding element, and add in the
227  // rightmost sub-element of the corresponding element of operand 1.
228  VSUM,
229
230  // Compute carry/borrow indication for add/subtract.
231  VACC, VSCBI,
232  // Add/subtract with carry/borrow.
233  VAC, VSBI,
234  // Compute carry/borrow indication for add/subtract with carry/borrow.
235  VACCC, VSBCBI,
236
237  // Compare integer vector operands 0 and 1 to produce the usual 0/-1
238  // vector result.  VICMPE is for equality, VICMPH for "signed greater than"
239  // and VICMPHL for "unsigned greater than".
240  VICMPE,
241  VICMPH,
242  VICMPHL,
243
244  // Likewise, but also set the condition codes on the result.
245  VICMPES,
246  VICMPHS,
247  VICMPHLS,
248
249  // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1
250  // vector result.  VFCMPE is for "ordered and equal", VFCMPH for "ordered and
251  // greater than" and VFCMPHE for "ordered and greater than or equal to".
252  VFCMPE,
253  VFCMPH,
254  VFCMPHE,
255
256  // Likewise, but also set the condition codes on the result.
257  VFCMPES,
258  VFCMPHS,
259  VFCMPHES,
260
261  // Test floating-point data class for vectors.
262  VFTCI,
263
264  // Extend the even f32 elements of vector operand 0 to produce a vector
265  // of f64 elements.
266  VEXTEND,
267
268  // Round the f64 elements of vector operand 0 to f32s and store them in the
269  // even elements of the result.
270  VROUND,
271
272  // AND the two vector operands together and set CC based on the result.
273  VTM,
274
275  // i128 high integer comparisons.
276  SCMP128HI,
277  UCMP128HI,
278
279  // String operations that set CC as a side-effect.
280  VFAE_CC,
281  VFAEZ_CC,
282  VFEE_CC,
283  VFEEZ_CC,
284  VFENE_CC,
285  VFENEZ_CC,
286  VISTR_CC,
287  VSTRC_CC,
288  VSTRCZ_CC,
289  VSTRS_CC,
290  VSTRSZ_CC,
291
292  // Test Data Class.
293  //
294  // Operand 0: the value to test
295  // Operand 1: the bit mask
296  TDC,
297
298  // z/OS XPLINK ADA Entry
299  // Wraps a TargetGlobalAddress that should be loaded from a function's
300  // AssociatedData Area (ADA). Tha ADA is passed to the function by the
301  // caller in the XPLink ABI defined register R5.
302  // Operand 0: the GlobalValue/External Symbol
303  // Operand 1: the ADA register
304  // Operand 2: the offset (0 for the first and 8 for the second element in the
305  // function descriptor)
306  ADA_ENTRY,
307
308  // Strict variants of scalar floating-point comparisons.
309  // Quiet and signaling versions.
310  STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
311  STRICT_FCMPS,
312
313  // Strict variants of vector floating-point comparisons.
314  // Quiet and signaling versions.
315  STRICT_VFCMPE,
316  STRICT_VFCMPH,
317  STRICT_VFCMPHE,
318  STRICT_VFCMPES,
319  STRICT_VFCMPHS,
320  STRICT_VFCMPHES,
321
322  // Strict variants of VEXTEND and VROUND.
323  STRICT_VEXTEND,
324  STRICT_VROUND,
325
326  // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
327  // ATOMIC_LOAD_<op>.
328  //
329  // Operand 0: the address of the containing 32-bit-aligned field
330  // Operand 1: the second operand of <op>, in the high bits of an i32
331  //            for everything except ATOMIC_SWAPW
332  // Operand 2: how many bits to rotate the i32 left to bring the first
333  //            operand into the high bits
334  // Operand 3: the negative of operand 2, for rotating the other way
335  // Operand 4: the width of the field in bits (8 or 16)
336  ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE,
337  ATOMIC_LOADW_ADD,
338  ATOMIC_LOADW_SUB,
339  ATOMIC_LOADW_AND,
340  ATOMIC_LOADW_OR,
341  ATOMIC_LOADW_XOR,
342  ATOMIC_LOADW_NAND,
343  ATOMIC_LOADW_MIN,
344  ATOMIC_LOADW_MAX,
345  ATOMIC_LOADW_UMIN,
346  ATOMIC_LOADW_UMAX,
347
348  // A wrapper around the inner loop of an ATOMIC_CMP_SWAP.
349  //
350  // Operand 0: the address of the containing 32-bit-aligned field
351  // Operand 1: the compare value, in the low bits of an i32
352  // Operand 2: the swap value, in the low bits of an i32
353  // Operand 3: how many bits to rotate the i32 left to bring the first
354  //            operand into the high bits
355  // Operand 4: the negative of operand 2, for rotating the other way
356  // Operand 5: the width of the field in bits (8 or 16)
357  ATOMIC_CMP_SWAPW,
358
359  // Atomic compare-and-swap returning CC value.
360  // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
361  ATOMIC_CMP_SWAP,
362
363  // 128-bit atomic load.
364  // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr)
365  ATOMIC_LOAD_128,
366
367  // 128-bit atomic store.
368  // OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr)
369  ATOMIC_STORE_128,
370
371  // 128-bit atomic compare-and-swap.
372  // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
373  ATOMIC_CMP_SWAP_128,
374
375  // Byte swapping load/store.  Same operands as regular load/store.
376  LRV, STRV,
377
378  // Element swapping load/store.  Same operands as regular load/store.
379  VLER, VSTER,
380
381  // Prefetch from the second operand using the 4-bit control code in
382  // the first operand.  The code is 1 for a load prefetch and 2 for
383  // a store prefetch.
384  PREFETCH
385};
386
387// Return true if OPCODE is some kind of PC-relative address.
388inline bool isPCREL(unsigned Opcode) {
389  return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
390}
391} // end namespace SystemZISD
392
393namespace SystemZICMP {
394// Describes whether an integer comparison needs to be signed or unsigned,
395// or whether either type is OK.
396enum {
397  Any,
398  UnsignedOnly,
399  SignedOnly
400};
401} // end namespace SystemZICMP
402
403class SystemZSubtarget;
404
405class SystemZTargetLowering : public TargetLowering {
406public:
407  explicit SystemZTargetLowering(const TargetMachine &TM,
408                                 const SystemZSubtarget &STI);
409
410  bool useSoftFloat() const override;
411
412  // Override TargetLowering.
413  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
414    return MVT::i32;
415  }
416  MVT getVectorIdxTy(const DataLayout &DL) const override {
417    // Only the lower 12 bits of an element index are used, so we don't
418    // want to clobber the upper 32 bits of a GPR unnecessarily.
419    return MVT::i32;
420  }
421  TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
422    const override {
423    // Widen subvectors to the full width rather than promoting integer
424    // elements.  This is better because:
425    //
426    // (a) it means that we can handle the ABI for passing and returning
427    //     sub-128 vectors without having to handle them as legal types.
428    //
429    // (b) we don't have instructions to extend on load and truncate on store,
430    //     so promoting the integers is less efficient.
431    //
432    // (c) there are no multiplication instructions for the widest integer
433    //     type (v2i64).
434    if (VT.getScalarSizeInBits() % 8 == 0)
435      return TypeWidenVector;
436    return TargetLoweringBase::getPreferredVectorAction(VT);
437  }
438  unsigned
439  getNumRegisters(LLVMContext &Context, EVT VT,
440                  std::optional<MVT> RegisterVT) const override {
441    // i128 inline assembly operand.
442    if (VT == MVT::i128 && RegisterVT && *RegisterVT == MVT::Untyped)
443      return 1;
444    return TargetLowering::getNumRegisters(Context, VT);
445  }
446  MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
447                                    EVT VT) const override {
448    // 128-bit single-element vector types are passed like other vectors,
449    // not like their element type.
450    if (VT.isVector() && VT.getSizeInBits() == 128 &&
451        VT.getVectorNumElements() == 1)
452      return MVT::v16i8;
453    return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
454  }
455  bool isCheapToSpeculateCtlz(Type *) const override { return true; }
456  bool isCheapToSpeculateCttz(Type *) const override { return true; }
457  bool preferZeroCompareBranch() const override { return true; }
458  bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override {
459    ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
460    return Mask && Mask->getValue().isIntN(16);
461  }
462  bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
463    return VT.isScalarInteger();
464  }
465  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
466                         EVT) const override;
467  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
468                                  EVT VT) const override;
469  bool isFPImmLegal(const APFloat &Imm, EVT VT,
470                    bool ForCodeSize) const override;
471  bool ShouldShrinkFPConstant(EVT VT) const override {
472    // Do not shrink 64-bit FP constpool entries since LDEB is slower than
473    // LD, and having the full constant in memory enables reg/mem opcodes.
474    return VT != MVT::f64;
475  }
476  bool hasInlineStackProbe(const MachineFunction &MF) const override;
477  AtomicExpansionKind
478  shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override;
479  bool isLegalICmpImmediate(int64_t Imm) const override;
480  bool isLegalAddImmediate(int64_t Imm) const override;
481  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
482                             unsigned AS,
483                             Instruction *I = nullptr) const override;
484  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
485                                      MachineMemOperand::Flags Flags,
486                                      unsigned *Fast) const override;
487  bool
488  findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
489                           const MemOp &Op, unsigned DstAS, unsigned SrcAS,
490                           const AttributeList &FuncAttributes) const override;
491  EVT getOptimalMemOpType(const MemOp &Op,
492                          const AttributeList &FuncAttributes) const override;
493  bool isTruncateFree(Type *, Type *) const override;
494  bool isTruncateFree(EVT, EVT) const override;
495
496  bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
497                            bool MathUsed) const override {
498    // Form add and sub with overflow intrinsics regardless of any extra
499    // users of the math result.
500    return VT == MVT::i32 || VT == MVT::i64;
501  }
502
503  bool shouldConsiderGEPOffsetSplit() const override { return true; }
504
505  const char *getTargetNodeName(unsigned Opcode) const override;
506  std::pair<unsigned, const TargetRegisterClass *>
507  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
508                               StringRef Constraint, MVT VT) const override;
509  TargetLowering::ConstraintType
510  getConstraintType(StringRef Constraint) const override;
511  TargetLowering::ConstraintWeight
512    getSingleConstraintMatchWeight(AsmOperandInfo &info,
513                                   const char *constraint) const override;
514  void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
515                                    std::vector<SDValue> &Ops,
516                                    SelectionDAG &DAG) const override;
517
518  InlineAsm::ConstraintCode
519  getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
520    if (ConstraintCode.size() == 1) {
521      switch(ConstraintCode[0]) {
522      default:
523        break;
524      case 'o':
525        return InlineAsm::ConstraintCode::o;
526      case 'Q':
527        return InlineAsm::ConstraintCode::Q;
528      case 'R':
529        return InlineAsm::ConstraintCode::R;
530      case 'S':
531        return InlineAsm::ConstraintCode::S;
532      case 'T':
533        return InlineAsm::ConstraintCode::T;
534      }
535    } else if (ConstraintCode.size() == 2 && ConstraintCode[0] == 'Z') {
536      switch (ConstraintCode[1]) {
537      default:
538        break;
539      case 'Q':
540        return InlineAsm::ConstraintCode::ZQ;
541      case 'R':
542        return InlineAsm::ConstraintCode::ZR;
543      case 'S':
544        return InlineAsm::ConstraintCode::ZS;
545      case 'T':
546        return InlineAsm::ConstraintCode::ZT;
547      }
548    }
549    return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
550  }
551
552  Register getRegisterByName(const char *RegName, LLT VT,
553                             const MachineFunction &MF) const override;
554
555  /// If a physical register, this returns the register that receives the
556  /// exception address on entry to an EH pad.
557  Register
558  getExceptionPointerRegister(const Constant *PersonalityFn) const override;
559
560  /// If a physical register, this returns the register that receives the
561  /// exception typeid on entry to a landing pad.
562  Register
563  getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
564
565  /// Override to support customized stack guard loading.
566  bool useLoadStackGuardNode() const override {
567    return true;
568  }
569  void insertSSPDeclarations(Module &M) const override {
570  }
571
572  MachineBasicBlock *
573  EmitInstrWithCustomInserter(MachineInstr &MI,
574                              MachineBasicBlock *BB) const override;
575  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
576  void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
577                             SelectionDAG &DAG) const override;
578  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
579                          SelectionDAG &DAG) const override;
580  const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
581  bool allowTruncateForTailCall(Type *, Type *) const override;
582  bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
583  bool splitValueIntoRegisterParts(
584      SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
585      unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
586      const override;
587  SDValue joinRegisterPartsIntoValue(
588      SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
589      unsigned NumParts, MVT PartVT, EVT ValueVT,
590      std::optional<CallingConv::ID> CC) const override;
591  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
592                               bool isVarArg,
593                               const SmallVectorImpl<ISD::InputArg> &Ins,
594                               const SDLoc &DL, SelectionDAG &DAG,
595                               SmallVectorImpl<SDValue> &InVals) const override;
596  SDValue LowerCall(CallLoweringInfo &CLI,
597                    SmallVectorImpl<SDValue> &InVals) const override;
598
599  std::pair<SDValue, SDValue>
600  makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName,
601                   EVT RetVT, ArrayRef<SDValue> Ops, CallingConv::ID CallConv,
602                   bool IsSigned, SDLoc DL, bool DoesNotReturn,
603                   bool IsReturnValueUsed) const;
604
605  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
606                      bool isVarArg,
607                      const SmallVectorImpl<ISD::OutputArg> &Outs,
608                      LLVMContext &Context) const override;
609  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
610                      const SmallVectorImpl<ISD::OutputArg> &Outs,
611                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
612                      SelectionDAG &DAG) const override;
613  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
614
615  /// Determine which of the bits specified in Mask are known to be either
616  /// zero or one and return them in the KnownZero/KnownOne bitsets.
617  void computeKnownBitsForTargetNode(const SDValue Op,
618                                     KnownBits &Known,
619                                     const APInt &DemandedElts,
620                                     const SelectionDAG &DAG,
621                                     unsigned Depth = 0) const override;
622
623  /// Determine the number of bits in the operation that are sign bits.
624  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
625                                           const APInt &DemandedElts,
626                                           const SelectionDAG &DAG,
627                                           unsigned Depth) const override;
628
629  bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
630      SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
631      bool PoisonOnly, unsigned Depth) const override;
632
633  ISD::NodeType getExtendForAtomicOps() const override {
634    return ISD::ANY_EXTEND;
635  }
636  ISD::NodeType getExtendForAtomicCmpSwapArg() const override {
637    return ISD::ZERO_EXTEND;
638  }
639
640  bool supportSwiftError() const override {
641    return true;
642  }
643
644  unsigned getStackProbeSize(const MachineFunction &MF) const;
645
646private:
647  const SystemZSubtarget &Subtarget;
648
649  // Implement LowerOperation for individual opcodes.
650  SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
651                       const SDLoc &DL, EVT VT,
652                       SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const;
653  SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL,
654                           EVT VT, ISD::CondCode CC,
655                           SDValue CmpOp0, SDValue CmpOp1,
656                           SDValue Chain = SDValue(),
657                           bool IsSignaling = false) const;
658  SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
659  SDValue lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG,
660                             bool IsSignaling) const;
661  SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
662  SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
663  SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
664                             SelectionDAG &DAG) const;
665  SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
666                            SelectionDAG &DAG, unsigned Opcode,
667                            SDValue GOTOffset) const;
668  SDValue lowerThreadPointer(const SDLoc &DL, SelectionDAG &DAG) const;
669  SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
670                                SelectionDAG &DAG) const;
671  SDValue lowerBlockAddress(BlockAddressSDNode *Node,
672                            SelectionDAG &DAG) const;
673  SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
674  SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
675  SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
676  SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
677  SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
678  SDValue lowerVASTART_ELF(SDValue Op, SelectionDAG &DAG) const;
679  SDValue lowerVASTART_XPLINK(SDValue Op, SelectionDAG &DAG) const;
680  SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
681  SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
682  SDValue lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, SelectionDAG &DAG) const;
683  SDValue lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, SelectionDAG &DAG) const;
684  SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
685  SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
686  SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
687  SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
688  SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
689  SDValue lowerXALUO(SDValue Op, SelectionDAG &DAG) const;
690  SDValue lowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const;
691  SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
692  SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
693  SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
694  SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
695  SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
696  SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
697  SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
698                              unsigned Opcode) const;
699  SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
700  SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
701  SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
702  SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
703  SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
704  SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
705  SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
706  bool isVectorElementLoad(SDValue Op) const;
707  SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
708                      SmallVectorImpl<SDValue> &Elems) const;
709  SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
710  SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
711  SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
712  SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
713  SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
714  SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
715  SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
716  SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
717  SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
718  SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
719
720  bool canTreatAsByteVector(EVT VT) const;
721  SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
722                         unsigned Index, DAGCombinerInfo &DCI,
723                         bool Force) const;
724  SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op,
725                                 DAGCombinerInfo &DCI) const;
726  SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
727  SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
728  SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
729  SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
730  bool canLoadStoreByteSwapped(EVT VT) const;
731  SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
732  SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
733  SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const;
734  SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
735  SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
736  SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
737  SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
738  SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
739  SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
740  SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
741  SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
742  SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
743  SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
744  SDValue combineINTRINSIC(SDNode *N, DAGCombinerInfo &DCI) const;
745
746  SDValue unwrapAddress(SDValue N) const override;
747
748  // If the last instruction before MBBI in MBB was some form of COMPARE,
749  // try to replace it with a COMPARE AND BRANCH just before MBBI.
750  // CCMask and Target are the BRC-like operands for the branch.
751  // Return true if the change was made.
752  bool convertPrevCompareToBranch(MachineBasicBlock *MBB,
753                                  MachineBasicBlock::iterator MBBI,
754                                  unsigned CCMask,
755                                  MachineBasicBlock *Target) const;
756
757  // Implement EmitInstrWithCustomInserter for individual operation types.
758  MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
759  MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
760                                   unsigned StoreOpcode, unsigned STOCOpcode,
761                                   bool Invert) const;
762  MachineBasicBlock *emitICmp128Hi(MachineInstr &MI, MachineBasicBlock *BB,
763                                   bool Unsigned) const;
764  MachineBasicBlock *emitPair128(MachineInstr &MI,
765                                 MachineBasicBlock *MBB) const;
766  MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB,
767                                bool ClearEven) const;
768  MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI,
769                                          MachineBasicBlock *BB,
770                                          unsigned BinOpcode,
771                                          bool Invert = false) const;
772  MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr &MI,
773                                          MachineBasicBlock *MBB,
774                                          unsigned CompareOpcode,
775                                          unsigned KeepOldMask) const;
776  MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI,
777                                        MachineBasicBlock *BB) const;
778  MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB,
779                                       unsigned Opcode,
780                                       bool IsMemset = false) const;
781  MachineBasicBlock *emitStringWrapper(MachineInstr &MI, MachineBasicBlock *BB,
782                                       unsigned Opcode) const;
783  MachineBasicBlock *emitTransactionBegin(MachineInstr &MI,
784                                          MachineBasicBlock *MBB,
785                                          unsigned Opcode, bool NoFloat) const;
786  MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
787                                         MachineBasicBlock *MBB,
788                                         unsigned Opcode) const;
789  MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
790                                      MachineBasicBlock *MBB) const;
791
792  SDValue getBackchainAddress(SDValue SP, SelectionDAG &DAG) const;
793
794  MachineMemOperand::Flags
795  getTargetMMOFlags(const Instruction &I) const override;
796  const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
797};
798
799struct SystemZVectorConstantInfo {
800private:
801  APInt IntBits;             // The 128 bits as an integer.
802  APInt SplatBits;           // Smallest splat value.
803  APInt SplatUndef;          // Bits correspoding to undef operands of the BVN.
804  unsigned SplatBitSize = 0;
805  bool isFP128 = false;
806public:
807  unsigned Opcode = 0;
808  SmallVector<unsigned, 2> OpVals;
809  MVT VecVT;
810  SystemZVectorConstantInfo(APInt IntImm);
811  SystemZVectorConstantInfo(APFloat FPImm)
812      : SystemZVectorConstantInfo(FPImm.bitcastToAPInt()) {
813    isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
814  }
815  SystemZVectorConstantInfo(BuildVectorSDNode *BVN);
816  bool isVectorConstantLegal(const SystemZSubtarget &Subtarget);
817};
818
819} // end namespace llvm
820
821#endif
822