1//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Interface definition of the TargetLowering class that is common
11/// to all AMD GPUs.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
16#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
17
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/TargetLowering.h"
20
21namespace llvm {
22
23class AMDGPUMachineFunction;
24class AMDGPUSubtarget;
25struct ArgDescriptor;
26
27class AMDGPUTargetLowering : public TargetLowering {
28private:
29  const AMDGPUSubtarget *Subtarget;
30
31  /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
32  /// legalized from a smaller type VT. Need to match pre-legalized type because
33  /// the generic legalization inserts the add/sub between the select and
34  /// compare.
35  SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const;
36
37public:
38  /// \returns The minimum number of bits needed to store the value of \Op as an
39  /// unsigned integer. Truncating to this size and then zero-extending to the
40  /// original size will not change the value.
41  static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG);
42
43  /// \returns The minimum number of bits needed to store the value of \Op as a
44  /// signed integer. Truncating to this size and then sign-extending to the
45  /// original size will not change the value.
46  static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
47
48protected:
49  SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
50  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
51  /// Split a vector store into multiple scalar stores.
52  /// \returns The resulting chain.
53
54  SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
55  SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
56  SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
57  SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
58  SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
59
60  SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const;
61  SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
62  SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
63
64  static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags);
65  static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src,
66                                     SDNodeFlags Flags);
67  SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op,
68                                SDNodeFlags Flags) const;
69  SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const;
70  std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG,
71                                                const SDLoc SL, SDValue Op,
72                                                SDNodeFlags Flags) const;
73
74  SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const;
75  SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const;
76  SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const;
77  SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
78                          bool IsLog10, SDNodeFlags Flags) const;
79  SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const;
80
81  SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
82                          SDNodeFlags Flags) const;
83  SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
84                            SDNodeFlags Flags) const;
85  SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
86
87  SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const;
88
89  SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
90
91  SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const;
92  SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
93  SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
94  SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
95
96  SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
97  SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
98  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
99
100  SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
101
102protected:
103  bool shouldCombineMemoryType(EVT VT) const;
104  SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
105  SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
106  SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
107  SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const;
108
109  SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
110                                       unsigned Opc, SDValue LHS,
111                                       uint32_t ValLo, uint32_t ValHi) const;
112  SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
113  SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
114  SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
115  SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const;
116  SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
117  SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const;
118  SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
119  SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
120  SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
121                             SDValue RHS, DAGCombinerInfo &DCI) const;
122
123  SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
124                               SDValue N) const;
125  SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
126
127  TargetLowering::NegatibleCost
128  getConstantNegateCost(const ConstantFPSDNode *C) const;
129
130  bool isConstantCostlierToNegate(SDValue N) const;
131  bool isConstantCheaperToNegate(SDValue N) const;
132  SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
133  SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
134  SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
135
136  static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
137
138  virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
139                                     SelectionDAG &DAG) const;
140
141  /// Return 64-bit value Op as two 32-bit integers.
142  std::pair<SDValue, SDValue> split64BitValue(SDValue Op,
143                                              SelectionDAG &DAG) const;
144  SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const;
145  SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const;
146
147  /// Split a vector type into two parts. The first part is a power of two
148  /// vector. The second part is whatever is left over, and is a scalar if it
149  /// would otherwise be a 1-vector.
150  std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const;
151
152  /// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
153  /// scalar.
154  std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL,
155                                          const EVT &LoVT, const EVT &HighVT,
156                                          SelectionDAG &DAG) const;
157
158  /// Split a vector load into 2 loads of half the vector.
159  SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
160
161  /// Widen a suitably aligned v3 load. For all other cases, split the input
162  /// vector load.
163  SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
164
165  /// Split a vector store into 2 stores of half the vector.
166  SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
167
168  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
169  SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
170  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
171  SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
172  void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
173                                    SmallVectorImpl<SDValue> &Results) const;
174
175  void analyzeFormalArgumentsCompute(
176    CCState &State,
177    const SmallVectorImpl<ISD::InputArg> &Ins) const;
178
179public:
180  AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
181
182  bool mayIgnoreSignedZero(SDValue Op) const;
183
184  static inline SDValue stripBitcast(SDValue Val) {
185    return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
186  }
187
188  static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc);
189  static bool allUsesHaveSourceMods(const SDNode *N,
190                                    unsigned CostThreshold = 4);
191  bool isFAbsFree(EVT VT) const override;
192  bool isFNegFree(EVT VT) const override;
193  bool isTruncateFree(EVT Src, EVT Dest) const override;
194  bool isTruncateFree(Type *Src, Type *Dest) const override;
195
196  bool isZExtFree(Type *Src, Type *Dest) const override;
197  bool isZExtFree(EVT Src, EVT Dest) const override;
198
199  SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
200                               bool LegalOperations, bool ForCodeSize,
201                               NegatibleCost &Cost,
202                               unsigned Depth) const override;
203
204  bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
205
206  bool isDesirableToCommuteWithShift(const SDNode *N,
207                                     CombineLevel Level) const override;
208
209  EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
210                          ISD::NodeType ExtendKind) const override;
211
212  MVT getVectorIdxTy(const DataLayout &) const override;
213  bool isSelectSupported(SelectSupportKind) const override;
214
215  bool isFPImmLegal(const APFloat &Imm, EVT VT,
216                    bool ForCodeSize) const override;
217  bool ShouldShrinkFPConstant(EVT VT) const override;
218  bool shouldReduceLoadWidth(SDNode *Load,
219                             ISD::LoadExtType ExtType,
220                             EVT ExtVT) const override;
221
222  bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
223                               const MachineMemOperand &MMO) const final;
224
225  bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
226                                    unsigned NumElem,
227                                    unsigned AS) const override;
228  bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override;
229  bool isCheapToSpeculateCttz(Type *Ty) const override;
230  bool isCheapToSpeculateCtlz(Type *Ty) const override;
231
232  bool isSDNodeAlwaysUniform(const SDNode *N) const override;
233  static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
234  static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
235
236  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
237                      const SmallVectorImpl<ISD::OutputArg> &Outs,
238                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
239                      SelectionDAG &DAG) const override;
240
241  SDValue addTokenForArgument(SDValue Chain,
242                              SelectionDAG &DAG,
243                              MachineFrameInfo &MFI,
244                              int ClobberedFI) const;
245
246  SDValue lowerUnhandledCall(CallLoweringInfo &CLI,
247                             SmallVectorImpl<SDValue> &InVals,
248                             StringRef Reason) const;
249  SDValue LowerCall(CallLoweringInfo &CLI,
250                    SmallVectorImpl<SDValue> &InVals) const override;
251
252  SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
253  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
254  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
255  void ReplaceNodeResults(SDNode * N,
256                          SmallVectorImpl<SDValue> &Results,
257                          SelectionDAG &DAG) const override;
258
259  SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS,
260                                   SDValue RHS, SDValue True, SDValue False,
261                                   SDValue CC, DAGCombinerInfo &DCI) const;
262
263  SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
264                               SDValue RHS, SDValue True, SDValue False,
265                               SDValue CC, DAGCombinerInfo &DCI) const;
266
267  const char* getTargetNodeName(unsigned Opcode) const override;
268
269  // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for
270  // AMDGPU.  Commit r319036,
271  // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6)
272  // turned on MergeConsecutiveStores() before Instruction Selection for all
273  // targets.  Enough AMDGPU compiles go into an infinite loop (
274  // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges;
275  // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for
276  // now.
277  bool mergeStoresAfterLegalization(EVT) const override { return false; }
278
279  bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
280    return true;
281  }
282  SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
283                           int &RefinementSteps, bool &UseOneConstNR,
284                           bool Reciprocal) const override;
285  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
286                           int &RefinementSteps) const override;
287
288  virtual SDNode *PostISelFolding(MachineSDNode *N,
289                                  SelectionDAG &DAG) const = 0;
290
291  /// Determine which of the bits specified in \p Mask are known to be
292  /// either zero or one and return them in the \p KnownZero and \p KnownOne
293  /// bitsets.
294  void computeKnownBitsForTargetNode(const SDValue Op,
295                                     KnownBits &Known,
296                                     const APInt &DemandedElts,
297                                     const SelectionDAG &DAG,
298                                     unsigned Depth = 0) const override;
299
300  unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
301                                           const SelectionDAG &DAG,
302                                           unsigned Depth = 0) const override;
303
304  unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis,
305                                            Register R,
306                                            const APInt &DemandedElts,
307                                            const MachineRegisterInfo &MRI,
308                                            unsigned Depth = 0) const override;
309
310  bool isKnownNeverNaNForTargetNode(SDValue Op,
311                                    const SelectionDAG &DAG,
312                                    bool SNaN = false,
313                                    unsigned Depth = 0) const override;
314
315  bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
316                           Register N1) const override;
317
318  /// Helper function that adds Reg to the LiveIn list of the DAG's
319  /// MachineFunction.
320  ///
321  /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise
322  /// a copy from the register.
323  SDValue CreateLiveInRegister(SelectionDAG &DAG,
324                               const TargetRegisterClass *RC,
325                               Register Reg, EVT VT,
326                               const SDLoc &SL,
327                               bool RawReg = false) const;
328  SDValue CreateLiveInRegister(SelectionDAG &DAG,
329                               const TargetRegisterClass *RC,
330                               Register Reg, EVT VT) const {
331    return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()));
332  }
333
334  // Returns the raw live in register rather than a copy from it.
335  SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG,
336                                  const TargetRegisterClass *RC,
337                                  Register Reg, EVT VT) const {
338    return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true);
339  }
340
341  /// Similar to CreateLiveInRegister, except value maybe loaded from a stack
342  /// slot rather than passed in a register.
343  SDValue loadStackInputValue(SelectionDAG &DAG,
344                              EVT VT,
345                              const SDLoc &SL,
346                              int64_t Offset) const;
347
348  SDValue storeStackInputValue(SelectionDAG &DAG,
349                               const SDLoc &SL,
350                               SDValue Chain,
351                               SDValue ArgVal,
352                               int64_t Offset) const;
353
354  SDValue loadInputValue(SelectionDAG &DAG,
355                         const TargetRegisterClass *RC,
356                         EVT VT, const SDLoc &SL,
357                         const ArgDescriptor &Arg) const;
358
359  enum ImplicitParameter {
360    FIRST_IMPLICIT,
361    PRIVATE_BASE,
362    SHARED_BASE,
363    QUEUE_PTR,
364  };
365
366  /// Helper function that returns the byte offset of the given
367  /// type of implicit parameter.
368  uint32_t getImplicitParameterOffset(const MachineFunction &MF,
369                                      const ImplicitParameter Param) const;
370  uint32_t getImplicitParameterOffset(const uint64_t ExplicitKernArgSize,
371                                      const ImplicitParameter Param) const;
372
373  MVT getFenceOperandTy(const DataLayout &DL) const override {
374    return MVT::i32;
375  }
376
377  AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
378
379  bool shouldSinkOperands(Instruction *I,
380                          SmallVectorImpl<Use *> &Ops) const override;
381};
382
383namespace AMDGPUISD {
384
385enum NodeType : unsigned {
386  // AMDIL ISD Opcodes
387  FIRST_NUMBER = ISD::BUILTIN_OP_END,
388  UMUL, // 32bit unsigned multiplication
389  BRANCH_COND,
390  // End AMDIL ISD Opcodes
391
392  // Function call.
393  CALL,
394  TC_RETURN,
395  TC_RETURN_GFX,
396  TC_RETURN_CHAIN,
397  TRAP,
398
399  // Masked control flow nodes.
400  IF,
401  ELSE,
402  LOOP,
403
404  // A uniform kernel return that terminates the wavefront.
405  ENDPGM,
406
407  // s_endpgm, but we may want to insert it in the middle of the block.
408  ENDPGM_TRAP,
409
410  // Return to a shader part's epilog code.
411  RETURN_TO_EPILOG,
412
413  // Return with values from a non-entry function.
414  RET_GLUE,
415
416  // Convert a unswizzled wave uniform stack address to an address compatible
417  // with a vector offset for use in stack access.
418  WAVE_ADDRESS,
419
420  DWORDADDR,
421  FRACT,
422
423  /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output
424  /// modifier behavior with dx10_enable.
425  CLAMP,
426
427  // This is SETCC with the full mask result which is used for a compare with a
428  // result bit per item in the wavefront.
429  SETCC,
430  SETREG,
431
432  DENORM_MODE,
433
434  // FP ops with input and output chain.
435  FMA_W_CHAIN,
436  FMUL_W_CHAIN,
437
438  // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
439  // Denormals handled on some parts.
440  COS_HW,
441  SIN_HW,
442  FMAX_LEGACY,
443  FMIN_LEGACY,
444
445  FMAX3,
446  SMAX3,
447  UMAX3,
448  FMIN3,
449  SMIN3,
450  UMIN3,
451  FMED3,
452  SMED3,
453  UMED3,
454  FMAXIMUM3,
455  FMINIMUM3,
456  FDOT2,
457  URECIP,
458  DIV_SCALE,
459  DIV_FMAS,
460  DIV_FIXUP,
461  // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
462  // treated as an illegal operation.
463  FMAD_FTZ,
464
465  // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
466  //            For f64, max error 2^29 ULP, handles denormals.
467  RCP,
468  RSQ,
469  RCP_LEGACY,
470  RCP_IFLAG,
471
472  // log2, no denormal handling for f32.
473  LOG,
474
475  // exp2, no denormal handling for f32.
476  EXP,
477
478  FMUL_LEGACY,
479  RSQ_CLAMP,
480  FP_CLASS,
481  DOT4,
482  CARRY,
483  BORROW,
484  BFE_U32,  // Extract range of bits with zero extension to 32-bits.
485  BFE_I32,  // Extract range of bits with sign extension to 32-bits.
486  BFI,      // (src0 & src1) | (~src0 & src2)
487  BFM,      // Insert a range of bits into a 32-bit word.
488  FFBH_U32, // ctlz with -1 if input is zero.
489  FFBH_I32,
490  FFBL_B32, // cttz with -1 if input is zero.
491  MUL_U24,
492  MUL_I24,
493  MULHI_U24,
494  MULHI_I24,
495  MAD_U24,
496  MAD_I24,
497  MAD_U64_U32,
498  MAD_I64_I32,
499  PERM,
500  TEXTURE_FETCH,
501  R600_EXPORT,
502  CONST_ADDRESS,
503  REGISTER_LOAD,
504  REGISTER_STORE,
505  SAMPLE,
506  SAMPLEB,
507  SAMPLED,
508  SAMPLEL,
509
510  // These cvt_f32_ubyte* nodes need to remain consecutive and in order.
511  CVT_F32_UBYTE0,
512  CVT_F32_UBYTE1,
513  CVT_F32_UBYTE2,
514  CVT_F32_UBYTE3,
515
516  // Convert two float 32 numbers into a single register holding two packed f16
517  // with round to zero.
518  CVT_PKRTZ_F16_F32,
519  CVT_PKNORM_I16_F32,
520  CVT_PKNORM_U16_F32,
521  CVT_PK_I16_I32,
522  CVT_PK_U16_U32,
523
524  // Same as the standard node, except the high bits of the resulting integer
525  // are known 0.
526  FP_TO_FP16,
527
528  /// This node is for VLIW targets and it is used to represent a vector
529  /// that is stored in consecutive registers with the same channel.
530  /// For example:
531  ///   |X  |Y|Z|W|
532  /// T0|v.x| | | |
533  /// T1|v.y| | | |
534  /// T2|v.z| | | |
535  /// T3|v.w| | | |
536  BUILD_VERTICAL_VECTOR,
537  /// Pointer to the start of the shader's constant data.
538  CONST_DATA_PTR,
539  PC_ADD_REL_OFFSET,
540  LDS,
541  FPTRUNC_ROUND_UPWARD,
542  FPTRUNC_ROUND_DOWNWARD,
543
544  DUMMY_CHAIN,
545  FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
546  LOAD_D16_HI,
547  LOAD_D16_LO,
548  LOAD_D16_HI_I8,
549  LOAD_D16_HI_U8,
550  LOAD_D16_LO_I8,
551  LOAD_D16_LO_U8,
552
553  STORE_MSKOR,
554  LOAD_CONSTANT,
555  TBUFFER_STORE_FORMAT,
556  TBUFFER_STORE_FORMAT_D16,
557  TBUFFER_LOAD_FORMAT,
558  TBUFFER_LOAD_FORMAT_D16,
559  DS_ORDERED_COUNT,
560  ATOMIC_CMP_SWAP,
561  ATOMIC_LOAD_FMIN,
562  ATOMIC_LOAD_FMAX,
563  BUFFER_LOAD,
564  BUFFER_LOAD_UBYTE,
565  BUFFER_LOAD_USHORT,
566  BUFFER_LOAD_BYTE,
567  BUFFER_LOAD_SHORT,
568  BUFFER_LOAD_FORMAT,
569  BUFFER_LOAD_FORMAT_TFE,
570  BUFFER_LOAD_FORMAT_D16,
571  SBUFFER_LOAD,
572  SBUFFER_LOAD_BYTE,
573  SBUFFER_LOAD_UBYTE,
574  SBUFFER_LOAD_SHORT,
575  SBUFFER_LOAD_USHORT,
576  BUFFER_STORE,
577  BUFFER_STORE_BYTE,
578  BUFFER_STORE_SHORT,
579  BUFFER_STORE_FORMAT,
580  BUFFER_STORE_FORMAT_D16,
581  BUFFER_ATOMIC_SWAP,
582  BUFFER_ATOMIC_ADD,
583  BUFFER_ATOMIC_SUB,
584  BUFFER_ATOMIC_SMIN,
585  BUFFER_ATOMIC_UMIN,
586  BUFFER_ATOMIC_SMAX,
587  BUFFER_ATOMIC_UMAX,
588  BUFFER_ATOMIC_AND,
589  BUFFER_ATOMIC_OR,
590  BUFFER_ATOMIC_XOR,
591  BUFFER_ATOMIC_INC,
592  BUFFER_ATOMIC_DEC,
593  BUFFER_ATOMIC_CMPSWAP,
594  BUFFER_ATOMIC_CSUB,
595  BUFFER_ATOMIC_FADD,
596  BUFFER_ATOMIC_FADD_BF16,
597  BUFFER_ATOMIC_FMIN,
598  BUFFER_ATOMIC_FMAX,
599  BUFFER_ATOMIC_COND_SUB_U32,
600
601  LAST_AMDGPU_ISD_NUMBER
602};
603
604} // End namespace AMDGPUISD
605
606} // End namespace llvm
607
608#endif
609