1//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that SystemZ uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
16#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
17
18#include "SystemZ.h"
19#include "llvm/CodeGen/MachineBasicBlock.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21#include "llvm/Target/TargetLowering.h"
22
23namespace llvm {
24namespace SystemZISD {
25enum NodeType : unsigned {
26  FIRST_NUMBER = ISD::BUILTIN_OP_END,
27
28  // Return with a flag operand.  Operand 0 is the chain operand.
29  RET_FLAG,
30
31  // Calls a function.  Operand 0 is the chain operand and operand 1
32  // is the target address.  The arguments start at operand 2.
33  // There is an optional glue operand at the end.
34  CALL,
35  SIBCALL,
36
37  // TLS calls.  Like regular calls, except operand 1 is the TLS symbol.
38  // (The call target is implicitly __tls_get_offset.)
39  TLS_GDCALL,
40  TLS_LDCALL,
41
42  // Wraps a TargetGlobalAddress that should be loaded using PC-relative
43  // accesses (LARL).  Operand 0 is the address.
44  PCREL_WRAPPER,
45
46  // Used in cases where an offset is applied to a TargetGlobalAddress.
47  // Operand 0 is the full TargetGlobalAddress and operand 1 is a
48  // PCREL_WRAPPER for an anchor point.  This is used so that we can
49  // cheaply refer to either the full address or the anchor point
50  // as a register base.
51  PCREL_OFFSET,
52
53  // Integer absolute.
54  IABS,
55
56  // Integer comparisons.  There are three operands: the two values
57  // to compare, and an integer of type SystemZICMP.
58  ICMP,
59
60  // Floating-point comparisons.  The two operands are the values to compare.
61  FCMP,
62
63  // Test under mask.  The first operand is ANDed with the second operand
64  // and the condition codes are set on the result.  The third operand is
65  // a boolean that is true if the condition codes need to distinguish
66  // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
67  // register forms do but the memory forms don't).
68  TM,
69
70  // Branches if a condition is true.  Operand 0 is the chain operand;
71  // operand 1 is the 4-bit condition-code mask, with bit N in
72  // big-endian order meaning "branch if CC=N"; operand 2 is the
73  // target block and operand 3 is the flag operand.
74  BR_CCMASK,
75
76  // Selects between operand 0 and operand 1.  Operand 2 is the
77  // mask of condition-code values for which operand 0 should be
78  // chosen over operand 1; it has the same form as BR_CCMASK.
79  // Operand 3 is the flag operand.
80  SELECT_CCMASK,
81
82  // Evaluates to the gap between the stack pointer and the
83  // base of the dynamically-allocatable area.
84  ADJDYNALLOC,
85
86  // Extracts the value of a 32-bit access register.  Operand 0 is
87  // the number of the register.
88  EXTRACT_ACCESS,
89
90  // Count number of bits set in operand 0 per byte.
91  POPCNT,
92
93  // Wrappers around the ISD opcodes of the same name.  The output and
94  // first input operands are GR128s.  The trailing numbers are the
95  // widths of the second operand in bits.
96  UMUL_LOHI64,
97  SDIVREM32,
98  SDIVREM64,
99  UDIVREM32,
100  UDIVREM64,
101
102  // Use a series of MVCs to copy bytes from one memory location to another.
103  // The operands are:
104  // - the target address
105  // - the source address
106  // - the constant length
107  //
108  // This isn't a memory opcode because we'd need to attach two
109  // MachineMemOperands rather than one.
110  MVC,
111
112  // Like MVC, but implemented as a loop that handles X*256 bytes
113  // followed by straight-line code to handle the rest (if any).
114  // The value of X is passed as an additional operand.
115  MVC_LOOP,
116
117  // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR).
118  NC,
119  NC_LOOP,
120  OC,
121  OC_LOOP,
122  XC,
123  XC_LOOP,
124
125  // Use CLC to compare two blocks of memory, with the same comments
126  // as for MVC and MVC_LOOP.
127  CLC,
128  CLC_LOOP,
129
130  // Use an MVST-based sequence to implement stpcpy().
131  STPCPY,
132
133  // Use a CLST-based sequence to implement strcmp().  The two input operands
134  // are the addresses of the strings to compare.
135  STRCMP,
136
137  // Use an SRST-based sequence to search a block of memory.  The first
138  // operand is the end address, the second is the start, and the third
139  // is the character to search for.  CC is set to 1 on success and 2
140  // on failure.
141  SEARCH_STRING,
142
143  // Store the CC value in bits 29 and 28 of an integer.
144  IPM,
145
146  // Perform a serialization operation.  (BCR 15,0 or BCR 14,0.)
147  SERIALIZE,
148
149  // Transaction begin.  The first operand is the chain, the second
150  // the TDB pointer, and the third the immediate control field.
151  // Returns chain and glue.
152  TBEGIN,
153  TBEGIN_NOFLOAT,
154
155  // Transaction end.  Just the chain operand.  Returns chain and glue.
156  TEND,
157
158  // Create a vector constant by filling byte N of the result with bit
159  // 15-N of the single operand.
160  BYTE_MASK,
161
162  // Create a vector constant by replicating an element-sized RISBG-style mask.
163  // The first operand specifies the starting set bit and the second operand
164  // specifies the ending set bit.  Both operands count from the MSB of the
165  // element.
166  ROTATE_MASK,
167
168  // Replicate a GPR scalar value into all elements of a vector.
169  REPLICATE,
170
171  // Create a vector from two i64 GPRs.
172  JOIN_DWORDS,
173
174  // Replicate one element of a vector into all elements.  The first operand
175  // is the vector and the second is the index of the element to replicate.
176  SPLAT,
177
178  // Interleave elements from the high half of operand 0 and the high half
179  // of operand 1.
180  MERGE_HIGH,
181
182  // Likewise for the low halves.
183  MERGE_LOW,
184
185  // Concatenate the vectors in the first two operands, shift them left
186  // by the third operand, and take the first half of the result.
187  SHL_DOUBLE,
188
189  // Take one element of the first v2i64 operand and the one element of
190  // the second v2i64 operand and concatenate them to form a v2i64 result.
191  // The third operand is a 4-bit value of the form 0A0B, where A and B
192  // are the element selectors for the first operand and second operands
193  // respectively.
194  PERMUTE_DWORDS,
195
196  // Perform a general vector permute on vector operands 0 and 1.
197  // Each byte of operand 2 controls the corresponding byte of the result,
198  // in the same way as a byte-level VECTOR_SHUFFLE mask.
199  PERMUTE,
200
201  // Pack vector operands 0 and 1 into a single vector with half-sized elements.
202  PACK,
203
204  // Likewise, but saturate the result and set CC.  PACKS_CC does signed
205  // saturation and PACKLS_CC does unsigned saturation.
206  PACKS_CC,
207  PACKLS_CC,
208
209  // Unpack the first half of vector operand 0 into double-sized elements.
210  // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
211  UNPACK_HIGH,
212  UNPACKL_HIGH,
213
214  // Likewise for the second half.
215  UNPACK_LOW,
216  UNPACKL_LOW,
217
218  // Shift each element of vector operand 0 by the number of bits specified
219  // by scalar operand 1.
220  VSHL_BY_SCALAR,
221  VSRL_BY_SCALAR,
222  VSRA_BY_SCALAR,
223
224  // For each element of the output type, sum across all sub-elements of
225  // operand 0 belonging to the corresponding element, and add in the
226  // rightmost sub-element of the corresponding element of operand 1.
227  VSUM,
228
229  // Compare integer vector operands 0 and 1 to produce the usual 0/-1
230  // vector result.  VICMPE is for equality, VICMPH for "signed greater than"
231  // and VICMPHL for "unsigned greater than".
232  VICMPE,
233  VICMPH,
234  VICMPHL,
235
236  // Likewise, but also set the condition codes on the result.
237  VICMPES,
238  VICMPHS,
239  VICMPHLS,
240
241  // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
242  // vector result.  VFCMPE is for "ordered and equal", VFCMPH for "ordered and
243  // greater than" and VFCMPHE for "ordered and greater than or equal to".
244  VFCMPE,
245  VFCMPH,
246  VFCMPHE,
247
248  // Likewise, but also set the condition codes on the result.
249  VFCMPES,
250  VFCMPHS,
251  VFCMPHES,
252
253  // Test floating-point data class for vectors.
254  VFTCI,
255
256  // Extend the even f32 elements of vector operand 0 to produce a vector
257  // of f64 elements.
258  VEXTEND,
259
260  // Round the f64 elements of vector operand 0 to f32s and store them in the
261  // even elements of the result.
262  VROUND,
263
264  // AND the two vector operands together and set CC based on the result.
265  VTM,
266
267  // String operations that set CC as a side-effect.
268  VFAE_CC,
269  VFAEZ_CC,
270  VFEE_CC,
271  VFEEZ_CC,
272  VFENE_CC,
273  VFENEZ_CC,
274  VISTR_CC,
275  VSTRC_CC,
276  VSTRCZ_CC,
277
278  // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
279  // ATOMIC_LOAD_<op>.
280  //
281  // Operand 0: the address of the containing 32-bit-aligned field
282  // Operand 1: the second operand of <op>, in the high bits of an i32
283  //            for everything except ATOMIC_SWAPW
284  // Operand 2: how many bits to rotate the i32 left to bring the first
285  //            operand into the high bits
286  // Operand 3: the negative of operand 2, for rotating the other way
287  // Operand 4: the width of the field in bits (8 or 16)
288  ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE,
289  ATOMIC_LOADW_ADD,
290  ATOMIC_LOADW_SUB,
291  ATOMIC_LOADW_AND,
292  ATOMIC_LOADW_OR,
293  ATOMIC_LOADW_XOR,
294  ATOMIC_LOADW_NAND,
295  ATOMIC_LOADW_MIN,
296  ATOMIC_LOADW_MAX,
297  ATOMIC_LOADW_UMIN,
298  ATOMIC_LOADW_UMAX,
299
300  // A wrapper around the inner loop of an ATOMIC_CMP_SWAP.
301  //
302  // Operand 0: the address of the containing 32-bit-aligned field
303  // Operand 1: the compare value, in the low bits of an i32
304  // Operand 2: the swap value, in the low bits of an i32
305  // Operand 3: how many bits to rotate the i32 left to bring the first
306  //            operand into the high bits
307  // Operand 4: the negative of operand 2, for rotating the other way
308  // Operand 5: the width of the field in bits (8 or 16)
309  ATOMIC_CMP_SWAPW,
310
311  // Prefetch from the second operand using the 4-bit control code in
312  // the first operand.  The code is 1 for a load prefetch and 2 for
313  // a store prefetch.
314  PREFETCH
315};
316
317// Return true if OPCODE is some kind of PC-relative address.
318inline bool isPCREL(unsigned Opcode) {
319  return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
320}
321} // end namespace SystemZISD
322
323namespace SystemZICMP {
324// Describes whether an integer comparison needs to be signed or unsigned,
325// or whether either type is OK.
326enum {
327  Any,
328  UnsignedOnly,
329  SignedOnly
330};
331} // end namespace SystemZICMP
332
333class SystemZSubtarget;
334class SystemZTargetMachine;
335
336class SystemZTargetLowering : public TargetLowering {
337public:
338  explicit SystemZTargetLowering(const TargetMachine &TM,
339                                 const SystemZSubtarget &STI);
340
341  // Override TargetLowering.
342  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
343    return MVT::i32;
344  }
345  MVT getVectorIdxTy(const DataLayout &DL) const override {
346    // Only the lower 12 bits of an element index are used, so we don't
347    // want to clobber the upper 32 bits of a GPR unnecessarily.
348    return MVT::i32;
349  }
350  TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
351    const override {
352    // Widen subvectors to the full width rather than promoting integer
353    // elements.  This is better because:
354    //
355    // (a) it means that we can handle the ABI for passing and returning
356    //     sub-128 vectors without having to handle them as legal types.
357    //
358    // (b) we don't have instructions to extend on load and truncate on store,
359    //     so promoting the integers is less efficient.
360    //
361    // (c) there are no multiplication instructions for the widest integer
362    //     type (v2i64).
363    if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
364      return TypeWidenVector;
365    return TargetLoweringBase::getPreferredVectorAction(VT);
366  }
367  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
368                         EVT) const override;
369  bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
370  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
371  bool isLegalICmpImmediate(int64_t Imm) const override;
372  bool isLegalAddImmediate(int64_t Imm) const override;
373  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
374                             unsigned AS) const override;
375  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
376                                      unsigned Align,
377                                      bool *Fast) const override;
378  bool isTruncateFree(Type *, Type *) const override;
379  bool isTruncateFree(EVT, EVT) const override;
380  const char *getTargetNodeName(unsigned Opcode) const override;
381  std::pair<unsigned, const TargetRegisterClass *>
382  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
383                               StringRef Constraint, MVT VT) const override;
384  TargetLowering::ConstraintType
385  getConstraintType(StringRef Constraint) const override;
386  TargetLowering::ConstraintWeight
387    getSingleConstraintMatchWeight(AsmOperandInfo &info,
388                                   const char *constraint) const override;
389  void LowerAsmOperandForConstraint(SDValue Op,
390                                    std::string &Constraint,
391                                    std::vector<SDValue> &Ops,
392                                    SelectionDAG &DAG) const override;
393
394  unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
395    if (ConstraintCode.size() == 1) {
396      switch(ConstraintCode[0]) {
397      default:
398        break;
399      case 'Q':
400        return InlineAsm::Constraint_Q;
401      case 'R':
402        return InlineAsm::Constraint_R;
403      case 'S':
404        return InlineAsm::Constraint_S;
405      case 'T':
406        return InlineAsm::Constraint_T;
407      }
408    }
409    return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
410  }
411
412  /// If a physical register, this returns the register that receives the
413  /// exception address on entry to an EH pad.
414  unsigned
415  getExceptionPointerRegister(const Constant *PersonalityFn) const override {
416    return SystemZ::R6D;
417  }
418
419  /// If a physical register, this returns the register that receives the
420  /// exception typeid on entry to a landing pad.
421  unsigned
422  getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
423    return SystemZ::R7D;
424  }
425
426  MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
427                                                 MachineBasicBlock *BB) const
428    override;
429  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
430  bool allowTruncateForTailCall(Type *, Type *) const override;
431  bool mayBeEmittedAsTailCall(CallInst *CI) const override;
432  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
433                               bool isVarArg,
434                               const SmallVectorImpl<ISD::InputArg> &Ins,
435                               SDLoc DL, SelectionDAG &DAG,
436                               SmallVectorImpl<SDValue> &InVals) const override;
437  SDValue LowerCall(CallLoweringInfo &CLI,
438                    SmallVectorImpl<SDValue> &InVals) const override;
439
440  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
441                      bool isVarArg,
442                      const SmallVectorImpl<ISD::OutputArg> &Outs,
443                      LLVMContext &Context) const override;
444  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
445                      const SmallVectorImpl<ISD::OutputArg> &Outs,
446                      const SmallVectorImpl<SDValue> &OutVals,
447                      SDLoc DL, SelectionDAG &DAG) const override;
448  SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL,
449                                      SelectionDAG &DAG) const override;
450  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
451
452private:
453  const SystemZSubtarget &Subtarget;
454
455  // Implement LowerOperation for individual opcodes.
456  SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
457  SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
458  SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
459  SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
460                             SelectionDAG &DAG) const;
461  SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
462                            SelectionDAG &DAG, unsigned Opcode,
463                            SDValue GOTOffset) const;
464  SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
465                                SelectionDAG &DAG) const;
466  SDValue lowerBlockAddress(BlockAddressSDNode *Node,
467                            SelectionDAG &DAG) const;
468  SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
469  SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
470  SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
471  SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
472  SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
473  SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
474  SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
475  SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
476  SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
477  SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
478  SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
479  SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
480  SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
481  SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
482  SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
483                              unsigned Opcode) const;
484  SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
485  SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
486  SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const;
487  SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
488  SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
489  SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
490  SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
491  SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
492  SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
493  SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
494  SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
495  SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
496  SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
497  SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
498                                 unsigned UnpackHigh) const;
499  SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
500
501  SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
502                         unsigned Index, DAGCombinerInfo &DCI,
503                         bool Force) const;
504  SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
505                                 DAGCombinerInfo &DCI) const;
506
507  // If the last instruction before MBBI in MBB was some form of COMPARE,
508  // try to replace it with a COMPARE AND BRANCH just before MBBI.
509  // CCMask and Target are the BRC-like operands for the branch.
510  // Return true if the change was made.
511  bool convertPrevCompareToBranch(MachineBasicBlock *MBB,
512                                  MachineBasicBlock::iterator MBBI,
513                                  unsigned CCMask,
514                                  MachineBasicBlock *Target) const;
515
516  // Implement EmitInstrWithCustomInserter for individual operation types.
517  MachineBasicBlock *emitSelect(MachineInstr *MI,
518                                MachineBasicBlock *BB) const;
519  MachineBasicBlock *emitCondStore(MachineInstr *MI,
520                                   MachineBasicBlock *BB,
521                                   unsigned StoreOpcode, unsigned STOCOpcode,
522                                   bool Invert) const;
523  MachineBasicBlock *emitExt128(MachineInstr *MI,
524                                MachineBasicBlock *MBB,
525                                bool ClearEven, unsigned SubReg) const;
526  MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI,
527                                          MachineBasicBlock *BB,
528                                          unsigned BinOpcode, unsigned BitSize,
529                                          bool Invert = false) const;
530  MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI,
531                                          MachineBasicBlock *MBB,
532                                          unsigned CompareOpcode,
533                                          unsigned KeepOldMask,
534                                          unsigned BitSize) const;
535  MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
536                                        MachineBasicBlock *BB) const;
537  MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
538                                       MachineBasicBlock *BB,
539                                       unsigned Opcode) const;
540  MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
541                                       MachineBasicBlock *BB,
542                                       unsigned Opcode) const;
543  MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
544                                          MachineBasicBlock *MBB,
545                                          unsigned Opcode,
546                                          bool NoFloat) const;
547  MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI,
548                                         MachineBasicBlock *MBB,
549                                         unsigned Opcode) const;
550
551};
552} // end namespace llvm
553
554#endif
555