1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDGPUTargetTransformInfo.h"
13#include "SIDefines.h"
14#include "llvm/ADT/StringRef.h"
15#include "llvm/ADT/Triple.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/CodeGen/MachineMemOperand.h"
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/Constants.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/GlobalValue.h"
22#include "llvm/IR/Instruction.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include "llvm/IR/IntrinsicsR600.h"
25#include "llvm/IR/LLVMContext.h"
26#include "llvm/IR/Module.h"
27#include "llvm/MC/MCContext.h"
28#include "llvm/MC/MCInstrDesc.h"
29#include "llvm/MC/MCInstrInfo.h"
30#include "llvm/MC/MCRegisterInfo.h"
31#include "llvm/MC/MCSectionELF.h"
32#include "llvm/MC/MCSubtargetInfo.h"
33#include "llvm/MC/SubtargetFeature.h"
34#include "llvm/Support/Casting.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/MathExtras.h"
37#include <algorithm>
38#include <cassert>
39#include <cstdint>
40#include <cstring>
41#include <utility>
42
43#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44
45#define GET_INSTRINFO_NAMED_OPS
46#define GET_INSTRMAP_INFO
47#include "AMDGPUGenInstrInfo.inc"
48#undef GET_INSTRMAP_INFO
49#undef GET_INSTRINFO_NAMED_OPS
50
51namespace {
52
53/// \returns Bit mask for given bit \p Shift and bit \p Width.
54unsigned getBitMask(unsigned Shift, unsigned Width) {
55  return ((1 << Width) - 1) << Shift;
56}
57
58/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
59///
60/// \returns Packed \p Dst.
61unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
62  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
63  Dst |= (Src << Shift) & getBitMask(Shift, Width);
64  return Dst;
65}
66
67/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
68///
69/// \returns Unpacked bits.
70unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
71  return (Src & getBitMask(Shift, Width)) >> Shift;
72}
73
74/// \returns Vmcnt bit shift (lower bits).
75unsigned getVmcntBitShiftLo() { return 0; }
76
77/// \returns Vmcnt bit width (lower bits).
78unsigned getVmcntBitWidthLo() { return 4; }
79
80/// \returns Expcnt bit shift.
81unsigned getExpcntBitShift() { return 4; }
82
83/// \returns Expcnt bit width.
84unsigned getExpcntBitWidth() { return 3; }
85
86/// \returns Lgkmcnt bit shift.
87unsigned getLgkmcntBitShift() { return 8; }
88
89/// \returns Lgkmcnt bit width.
90unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
91  return (VersionMajor >= 10) ? 6 : 4;
92}
93
94/// \returns Vmcnt bit shift (higher bits).
95unsigned getVmcntBitShiftHi() { return 14; }
96
97/// \returns Vmcnt bit width (higher bits).
98unsigned getVmcntBitWidthHi() { return 2; }
99
100} // end namespace anonymous
101
102namespace llvm {
103
104namespace AMDGPU {
105
106#define GET_MIMGBaseOpcodesTable_IMPL
107#define GET_MIMGDimInfoTable_IMPL
108#define GET_MIMGInfoTable_IMPL
109#define GET_MIMGLZMappingTable_IMPL
110#define GET_MIMGMIPMappingTable_IMPL
111#define GET_MIMGG16MappingTable_IMPL
112#include "AMDGPUGenSearchableTables.inc"
113
114int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
115                  unsigned VDataDwords, unsigned VAddrDwords) {
116  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
117                                             VDataDwords, VAddrDwords);
118  return Info ? Info->Opcode : -1;
119}
120
121const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
122  const MIMGInfo *Info = getMIMGInfo(Opc);
123  return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
124}
125
126int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
127  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
128  const MIMGInfo *NewInfo =
129      getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
130                          NewChannels, OrigInfo->VAddrDwords);
131  return NewInfo ? NewInfo->Opcode : -1;
132}
133
134struct MUBUFInfo {
135  uint16_t Opcode;
136  uint16_t BaseOpcode;
137  uint8_t elements;
138  bool has_vaddr;
139  bool has_srsrc;
140  bool has_soffset;
141};
142
143struct MTBUFInfo {
144  uint16_t Opcode;
145  uint16_t BaseOpcode;
146  uint8_t elements;
147  bool has_vaddr;
148  bool has_srsrc;
149  bool has_soffset;
150};
151
152struct SMInfo {
153  uint16_t Opcode;
154  bool IsBuffer;
155};
156
157#define GET_MTBUFInfoTable_DECL
158#define GET_MTBUFInfoTable_IMPL
159#define GET_MUBUFInfoTable_DECL
160#define GET_MUBUFInfoTable_IMPL
161#define GET_SMInfoTable_DECL
162#define GET_SMInfoTable_IMPL
163#include "AMDGPUGenSearchableTables.inc"
164
165int getMTBUFBaseOpcode(unsigned Opc) {
166  const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
167  return Info ? Info->BaseOpcode : -1;
168}
169
170int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
171  const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
172  return Info ? Info->Opcode : -1;
173}
174
175int getMTBUFElements(unsigned Opc) {
176  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
177  return Info ? Info->elements : 0;
178}
179
180bool getMTBUFHasVAddr(unsigned Opc) {
181  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
182  return Info ? Info->has_vaddr : false;
183}
184
185bool getMTBUFHasSrsrc(unsigned Opc) {
186  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
187  return Info ? Info->has_srsrc : false;
188}
189
190bool getMTBUFHasSoffset(unsigned Opc) {
191  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
192  return Info ? Info->has_soffset : false;
193}
194
195int getMUBUFBaseOpcode(unsigned Opc) {
196  const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
197  return Info ? Info->BaseOpcode : -1;
198}
199
200int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
201  const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
202  return Info ? Info->Opcode : -1;
203}
204
205int getMUBUFElements(unsigned Opc) {
206  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
207  return Info ? Info->elements : 0;
208}
209
210bool getMUBUFHasVAddr(unsigned Opc) {
211  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
212  return Info ? Info->has_vaddr : false;
213}
214
215bool getMUBUFHasSrsrc(unsigned Opc) {
216  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
217  return Info ? Info->has_srsrc : false;
218}
219
220bool getMUBUFHasSoffset(unsigned Opc) {
221  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
222  return Info ? Info->has_soffset : false;
223}
224
225bool getSMEMIsBuffer(unsigned Opc) {
226  const SMInfo *Info = getSMEMOpcodeHelper(Opc);
227  return Info ? Info->IsBuffer : false;
228}
229
230// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
231// header files, so we need to wrap it in a function that takes unsigned
232// instead.
233int getMCOpcode(uint16_t Opcode, unsigned Gen) {
234  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
235}
236
237namespace IsaInfo {
238
239void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
240  auto TargetTriple = STI->getTargetTriple();
241  auto Version = getIsaVersion(STI->getCPU());
242
243  Stream << TargetTriple.getArchName() << '-'
244         << TargetTriple.getVendorName() << '-'
245         << TargetTriple.getOSName() << '-'
246         << TargetTriple.getEnvironmentName() << '-'
247         << "gfx"
248         << Version.Major
249         << Version.Minor
250         << Version.Stepping;
251
252  if (hasXNACK(*STI))
253    Stream << "+xnack";
254  if (hasSRAMECC(*STI))
255    Stream << "+sram-ecc";
256
257  Stream.flush();
258}
259
260bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
261  return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
262             STI->getFeatureBits().test(FeatureCodeObjectV3);
263}
264
265unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
266  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
267    return 16;
268  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
269    return 32;
270
271  return 64;
272}
273
274unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
275  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
276    return 32768;
277  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
278    return 65536;
279
280  return 0;
281}
282
283unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
284  // "Per CU" really means "per whatever functional block the waves of a
285  // workgroup must share". For gfx10 in CU mode this is the CU, which contains
286  // two SIMDs.
287  if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode))
288    return 2;
289  // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
290  // two CUs, so a total of four SIMDs.
291  return 4;
292}
293
294unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
295                               unsigned FlatWorkGroupSize) {
296  assert(FlatWorkGroupSize != 0);
297  if (STI->getTargetTriple().getArch() != Triple::amdgcn)
298    return 8;
299  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
300  if (N == 1)
301    return 40;
302  N = 40 / N;
303  return std::min(N, 16u);
304}
305
306unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
307  return 1;
308}
309
310unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
311  // FIXME: Need to take scratch memory into account.
312  if (!isGFX10(*STI))
313    return 10;
314  return hasGFX10_3Insts(*STI) ? 16 : 20;
315}
316
317unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
318                                   unsigned FlatWorkGroupSize) {
319  return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
320                    getEUsPerCU(STI));
321}
322
323unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
324  return 1;
325}
326
327unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
328  // Some subtargets allow encoding 2048, but this isn't tested or supported.
329  return 1024;
330}
331
332unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
333                              unsigned FlatWorkGroupSize) {
334  return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
335}
336
337unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
338  IsaVersion Version = getIsaVersion(STI->getCPU());
339  if (Version.Major >= 10)
340    return getAddressableNumSGPRs(STI);
341  if (Version.Major >= 8)
342    return 16;
343  return 8;
344}
345
346unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
347  return 8;
348}
349
350unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
351  IsaVersion Version = getIsaVersion(STI->getCPU());
352  if (Version.Major >= 8)
353    return 800;
354  return 512;
355}
356
357unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
358  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
359    return FIXED_NUM_SGPRS_FOR_INIT_BUG;
360
361  IsaVersion Version = getIsaVersion(STI->getCPU());
362  if (Version.Major >= 10)
363    return 106;
364  if (Version.Major >= 8)
365    return 102;
366  return 104;
367}
368
369unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
370  assert(WavesPerEU != 0);
371
372  IsaVersion Version = getIsaVersion(STI->getCPU());
373  if (Version.Major >= 10)
374    return 0;
375
376  if (WavesPerEU >= getMaxWavesPerEU(STI))
377    return 0;
378
379  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
380  if (STI->getFeatureBits().test(FeatureTrapHandler))
381    MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
382  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
383  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
384}
385
386unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
387                        bool Addressable) {
388  assert(WavesPerEU != 0);
389
390  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
391  IsaVersion Version = getIsaVersion(STI->getCPU());
392  if (Version.Major >= 10)
393    return Addressable ? AddressableNumSGPRs : 108;
394  if (Version.Major >= 8 && !Addressable)
395    AddressableNumSGPRs = 112;
396  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
397  if (STI->getFeatureBits().test(FeatureTrapHandler))
398    MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
399  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
400  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
401}
402
403unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
404                          bool FlatScrUsed, bool XNACKUsed) {
405  unsigned ExtraSGPRs = 0;
406  if (VCCUsed)
407    ExtraSGPRs = 2;
408
409  IsaVersion Version = getIsaVersion(STI->getCPU());
410  if (Version.Major >= 10)
411    return ExtraSGPRs;
412
413  if (Version.Major < 8) {
414    if (FlatScrUsed)
415      ExtraSGPRs = 4;
416  } else {
417    if (XNACKUsed)
418      ExtraSGPRs = 4;
419
420    if (FlatScrUsed)
421      ExtraSGPRs = 6;
422  }
423
424  return ExtraSGPRs;
425}
426
427unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
428                          bool FlatScrUsed) {
429  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
430                          STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
431}
432
433unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
434  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
435  // SGPRBlocks is actual number of SGPR blocks minus 1.
436  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
437}
438
439unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
440                             Optional<bool> EnableWavefrontSize32) {
441  bool IsWave32 = EnableWavefrontSize32 ?
442      *EnableWavefrontSize32 :
443      STI->getFeatureBits().test(FeatureWavefrontSize32);
444
445  if (hasGFX10_3Insts(*STI))
446    return IsWave32 ? 16 : 8;
447
448  return IsWave32 ? 8 : 4;
449}
450
451unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
452                                Optional<bool> EnableWavefrontSize32) {
453
454  bool IsWave32 = EnableWavefrontSize32 ?
455      *EnableWavefrontSize32 :
456      STI->getFeatureBits().test(FeatureWavefrontSize32);
457
458  return IsWave32 ? 8 : 4;
459}
460
461unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
462  if (!isGFX10(*STI))
463    return 256;
464  return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
465}
466
467unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
468  return 256;
469}
470
471unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
472  assert(WavesPerEU != 0);
473
474  if (WavesPerEU >= getMaxWavesPerEU(STI))
475    return 0;
476  unsigned MinNumVGPRs =
477      alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
478                getVGPRAllocGranule(STI)) + 1;
479  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
480}
481
482unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
483  assert(WavesPerEU != 0);
484
485  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
486                                   getVGPRAllocGranule(STI));
487  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
488  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
489}
490
491unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
492                          Optional<bool> EnableWavefrontSize32) {
493  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
494                     getVGPREncodingGranule(STI, EnableWavefrontSize32));
495  // VGPRBlocks is actual number of VGPR blocks minus 1.
496  return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
497}
498
499} // end namespace IsaInfo
500
501void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
502                               const MCSubtargetInfo *STI) {
503  IsaVersion Version = getIsaVersion(STI->getCPU());
504
505  memset(&Header, 0, sizeof(Header));
506
507  Header.amd_kernel_code_version_major = 1;
508  Header.amd_kernel_code_version_minor = 2;
509  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
510  Header.amd_machine_version_major = Version.Major;
511  Header.amd_machine_version_minor = Version.Minor;
512  Header.amd_machine_version_stepping = Version.Stepping;
513  Header.kernel_code_entry_byte_offset = sizeof(Header);
514  Header.wavefront_size = 6;
515
516  // If the code object does not support indirect functions, then the value must
517  // be 0xffffffff.
518  Header.call_convention = -1;
519
520  // These alignment values are specified in powers of two, so alignment =
521  // 2^n.  The minimum alignment is 2^4 = 16.
522  Header.kernarg_segment_alignment = 4;
523  Header.group_segment_alignment = 4;
524  Header.private_segment_alignment = 4;
525
526  if (Version.Major >= 10) {
527    if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
528      Header.wavefront_size = 5;
529      Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
530    }
531    Header.compute_pgm_resource_registers |=
532      S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
533      S_00B848_MEM_ORDERED(1);
534  }
535}
536
537amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
538    const MCSubtargetInfo *STI) {
539  IsaVersion Version = getIsaVersion(STI->getCPU());
540
541  amdhsa::kernel_descriptor_t KD;
542  memset(&KD, 0, sizeof(KD));
543
544  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
545                  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
546                  amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
547  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
548                  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
549  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
550                  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
551  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
552                  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
553  if (Version.Major >= 10) {
554    AMDHSA_BITS_SET(KD.kernel_code_properties,
555                    amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
556                    STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
557    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
558                    amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
559                    STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
560    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
561                    amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
562  }
563  return KD;
564}
565
566bool isGroupSegment(const GlobalValue *GV) {
567  return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
568}
569
570bool isGlobalSegment(const GlobalValue *GV) {
571  return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
572}
573
574bool isReadOnlySegment(const GlobalValue *GV) {
575  unsigned AS = GV->getAddressSpace();
576  return AS == AMDGPUAS::CONSTANT_ADDRESS ||
577         AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
578}
579
580bool shouldEmitConstantsToTextSection(const Triple &TT) {
581  return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
582}
583
584int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
585  Attribute A = F.getFnAttribute(Name);
586  int Result = Default;
587
588  if (A.isStringAttribute()) {
589    StringRef Str = A.getValueAsString();
590    if (Str.getAsInteger(0, Result)) {
591      LLVMContext &Ctx = F.getContext();
592      Ctx.emitError("can't parse integer attribute " + Name);
593    }
594  }
595
596  return Result;
597}
598
599std::pair<int, int> getIntegerPairAttribute(const Function &F,
600                                            StringRef Name,
601                                            std::pair<int, int> Default,
602                                            bool OnlyFirstRequired) {
603  Attribute A = F.getFnAttribute(Name);
604  if (!A.isStringAttribute())
605    return Default;
606
607  LLVMContext &Ctx = F.getContext();
608  std::pair<int, int> Ints = Default;
609  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
610  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
611    Ctx.emitError("can't parse first integer attribute " + Name);
612    return Default;
613  }
614  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
615    if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
616      Ctx.emitError("can't parse second integer attribute " + Name);
617      return Default;
618    }
619  }
620
621  return Ints;
622}
623
624unsigned getVmcntBitMask(const IsaVersion &Version) {
625  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
626  if (Version.Major < 9)
627    return VmcntLo;
628
629  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
630  return VmcntLo | VmcntHi;
631}
632
633unsigned getExpcntBitMask(const IsaVersion &Version) {
634  return (1 << getExpcntBitWidth()) - 1;
635}
636
637unsigned getLgkmcntBitMask(const IsaVersion &Version) {
638  return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
639}
640
641unsigned getWaitcntBitMask(const IsaVersion &Version) {
642  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
643  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
644  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
645                                getLgkmcntBitWidth(Version.Major));
646  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
647  if (Version.Major < 9)
648    return Waitcnt;
649
650  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
651  return Waitcnt | VmcntHi;
652}
653
654unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
655  unsigned VmcntLo =
656      unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
657  if (Version.Major < 9)
658    return VmcntLo;
659
660  unsigned VmcntHi =
661      unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
662  VmcntHi <<= getVmcntBitWidthLo();
663  return VmcntLo | VmcntHi;
664}
665
666unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
667  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
668}
669
670unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
671  return unpackBits(Waitcnt, getLgkmcntBitShift(),
672                    getLgkmcntBitWidth(Version.Major));
673}
674
675void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
676                   unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
677  Vmcnt = decodeVmcnt(Version, Waitcnt);
678  Expcnt = decodeExpcnt(Version, Waitcnt);
679  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
680}
681
682Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
683  Waitcnt Decoded;
684  Decoded.VmCnt = decodeVmcnt(Version, Encoded);
685  Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
686  Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
687  return Decoded;
688}
689
690unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
691                     unsigned Vmcnt) {
692  Waitcnt =
693      packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
694  if (Version.Major < 9)
695    return Waitcnt;
696
697  Vmcnt >>= getVmcntBitWidthLo();
698  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
699}
700
701unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
702                      unsigned Expcnt) {
703  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
704}
705
706unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
707                       unsigned Lgkmcnt) {
708  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
709                                    getLgkmcntBitWidth(Version.Major));
710}
711
712unsigned encodeWaitcnt(const IsaVersion &Version,
713                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
714  unsigned Waitcnt = getWaitcntBitMask(Version);
715  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
716  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
717  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
718  return Waitcnt;
719}
720
721unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
722  return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
723}
724
725//===----------------------------------------------------------------------===//
726// hwreg
727//===----------------------------------------------------------------------===//
728
729namespace Hwreg {
730
731int64_t getHwregId(const StringRef Name) {
732  for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
733    if (IdSymbolic[Id] && Name == IdSymbolic[Id])
734      return Id;
735  }
736  return ID_UNKNOWN_;
737}
738
739static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
740  if (isSI(STI) || isCI(STI) || isVI(STI))
741    return ID_SYMBOLIC_FIRST_GFX9_;
742  else if (isGFX9(STI))
743    return ID_SYMBOLIC_FIRST_GFX10_;
744  else if (isGFX10(STI) && !isGFX10_BEncoding(STI))
745    return ID_SYMBOLIC_FIRST_GFX1030_;
746  else
747    return ID_SYMBOLIC_LAST_;
748}
749
750bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
751  return
752    ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
753    IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI));
754}
755
756bool isValidHwreg(int64_t Id) {
757  return 0 <= Id && isUInt<ID_WIDTH_>(Id);
758}
759
760bool isValidHwregOffset(int64_t Offset) {
761  return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
762}
763
764bool isValidHwregWidth(int64_t Width) {
765  return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
766}
767
768uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
769  return (Id << ID_SHIFT_) |
770         (Offset << OFFSET_SHIFT_) |
771         ((Width - 1) << WIDTH_M1_SHIFT_);
772}
773
774StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
775  return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
776}
777
778void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
779  Id = (Val & ID_MASK_) >> ID_SHIFT_;
780  Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
781  Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
782}
783
784} // namespace Hwreg
785
786//===----------------------------------------------------------------------===//
787// SendMsg
788//===----------------------------------------------------------------------===//
789
790namespace SendMsg {
791
792int64_t getMsgId(const StringRef Name) {
793  for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
794    if (IdSymbolic[i] && Name == IdSymbolic[i])
795      return i;
796  }
797  return ID_UNKNOWN_;
798}
799
800static bool isValidMsgId(int64_t MsgId) {
801  return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
802}
803
804bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
805  if (Strict) {
806    if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
807      return isGFX9(STI) || isGFX10(STI);
808    else
809      return isValidMsgId(MsgId);
810  } else {
811    return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
812  }
813}
814
815StringRef getMsgName(int64_t MsgId) {
816  return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
817}
818
819int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
820  const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
821  const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
822  const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
823  for (int i = F; i < L; ++i) {
824    if (Name == S[i]) {
825      return i;
826    }
827  }
828  return OP_UNKNOWN_;
829}
830
831bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
832
833  if (!Strict)
834    return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
835
836  switch(MsgId)
837  {
838  case ID_GS:
839    return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
840  case ID_GS_DONE:
841    return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
842  case ID_SYSMSG:
843    return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
844  default:
845    return OpId == OP_NONE_;
846  }
847}
848
849StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
850  assert(msgRequiresOp(MsgId));
851  return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
852}
853
854bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
855
856  if (!Strict)
857    return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
858
859  switch(MsgId)
860  {
861  case ID_GS:
862    return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
863  case ID_GS_DONE:
864    return (OpId == OP_GS_NOP)?
865           (StreamId == STREAM_ID_NONE_) :
866           (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
867  default:
868    return StreamId == STREAM_ID_NONE_;
869  }
870}
871
872bool msgRequiresOp(int64_t MsgId) {
873  return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
874}
875
876bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
877  return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
878}
879
880void decodeMsg(unsigned Val,
881               uint16_t &MsgId,
882               uint16_t &OpId,
883               uint16_t &StreamId) {
884  MsgId = Val & ID_MASK_;
885  OpId = (Val & OP_MASK_) >> OP_SHIFT_;
886  StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
887}
888
889uint64_t encodeMsg(uint64_t MsgId,
890                   uint64_t OpId,
891                   uint64_t StreamId) {
892  return (MsgId << ID_SHIFT_) |
893         (OpId << OP_SHIFT_) |
894         (StreamId << STREAM_ID_SHIFT_);
895}
896
897} // namespace SendMsg
898
899//===----------------------------------------------------------------------===//
900//
901//===----------------------------------------------------------------------===//
902
903unsigned getInitialPSInputAddr(const Function &F) {
904  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
905}
906
907bool isShader(CallingConv::ID cc) {
908  switch(cc) {
909    case CallingConv::AMDGPU_VS:
910    case CallingConv::AMDGPU_LS:
911    case CallingConv::AMDGPU_HS:
912    case CallingConv::AMDGPU_ES:
913    case CallingConv::AMDGPU_GS:
914    case CallingConv::AMDGPU_PS:
915    case CallingConv::AMDGPU_CS:
916      return true;
917    default:
918      return false;
919  }
920}
921
922bool isCompute(CallingConv::ID cc) {
923  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
924}
925
926bool isEntryFunctionCC(CallingConv::ID CC) {
927  switch (CC) {
928  case CallingConv::AMDGPU_KERNEL:
929  case CallingConv::SPIR_KERNEL:
930  case CallingConv::AMDGPU_VS:
931  case CallingConv::AMDGPU_GS:
932  case CallingConv::AMDGPU_PS:
933  case CallingConv::AMDGPU_CS:
934  case CallingConv::AMDGPU_ES:
935  case CallingConv::AMDGPU_HS:
936  case CallingConv::AMDGPU_LS:
937    return true;
938  default:
939    return false;
940  }
941}
942
943bool hasXNACK(const MCSubtargetInfo &STI) {
944  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
945}
946
947bool hasSRAMECC(const MCSubtargetInfo &STI) {
948  return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
949}
950
951bool hasMIMG_R128(const MCSubtargetInfo &STI) {
952  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
953}
954
955bool hasGFX10A16(const MCSubtargetInfo &STI) {
956  return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
957}
958
959bool hasG16(const MCSubtargetInfo &STI) {
960  return STI.getFeatureBits()[AMDGPU::FeatureG16];
961}
962
963bool hasPackedD16(const MCSubtargetInfo &STI) {
964  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
965}
966
967bool isSI(const MCSubtargetInfo &STI) {
968  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
969}
970
971bool isCI(const MCSubtargetInfo &STI) {
972  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
973}
974
975bool isVI(const MCSubtargetInfo &STI) {
976  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
977}
978
979bool isGFX9(const MCSubtargetInfo &STI) {
980  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
981}
982
983bool isGFX10(const MCSubtargetInfo &STI) {
984  return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
985}
986
987bool isGCN3Encoding(const MCSubtargetInfo &STI) {
988  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
989}
990
991bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
992  return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding];
993}
994
995bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
996  return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts];
997}
998
999bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
1000  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
1001  const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
1002  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
1003    Reg == AMDGPU::SCC;
1004}
1005
1006bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
1007  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
1008    if (*R == Reg1) return true;
1009  }
1010  return false;
1011}
1012
1013#define MAP_REG2REG \
1014  using namespace AMDGPU; \
1015  switch(Reg) { \
1016  default: return Reg; \
1017  CASE_CI_VI(FLAT_SCR) \
1018  CASE_CI_VI(FLAT_SCR_LO) \
1019  CASE_CI_VI(FLAT_SCR_HI) \
1020  CASE_VI_GFX9_GFX10(TTMP0) \
1021  CASE_VI_GFX9_GFX10(TTMP1) \
1022  CASE_VI_GFX9_GFX10(TTMP2) \
1023  CASE_VI_GFX9_GFX10(TTMP3) \
1024  CASE_VI_GFX9_GFX10(TTMP4) \
1025  CASE_VI_GFX9_GFX10(TTMP5) \
1026  CASE_VI_GFX9_GFX10(TTMP6) \
1027  CASE_VI_GFX9_GFX10(TTMP7) \
1028  CASE_VI_GFX9_GFX10(TTMP8) \
1029  CASE_VI_GFX9_GFX10(TTMP9) \
1030  CASE_VI_GFX9_GFX10(TTMP10) \
1031  CASE_VI_GFX9_GFX10(TTMP11) \
1032  CASE_VI_GFX9_GFX10(TTMP12) \
1033  CASE_VI_GFX9_GFX10(TTMP13) \
1034  CASE_VI_GFX9_GFX10(TTMP14) \
1035  CASE_VI_GFX9_GFX10(TTMP15) \
1036  CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
1037  CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
1038  CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
1039  CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
1040  CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
1041  CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
1042  CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
1043  CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
1044  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
1045  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
1046  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
1047  CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
1048  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1049  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1050  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1051  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1052  }
1053
1054#define CASE_CI_VI(node) \
1055  assert(!isSI(STI)); \
1056  case node: return isCI(STI) ? node##_ci : node##_vi;
1057
1058#define CASE_VI_GFX9_GFX10(node) \
1059  case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
1060
1061unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
1062  if (STI.getTargetTriple().getArch() == Triple::r600)
1063    return Reg;
1064  MAP_REG2REG
1065}
1066
1067#undef CASE_CI_VI
1068#undef CASE_VI_GFX9_GFX10
1069
1070#define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
1071#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
1072
1073unsigned mc2PseudoReg(unsigned Reg) {
1074  MAP_REG2REG
1075}
1076
1077#undef CASE_CI_VI
1078#undef CASE_VI_GFX9_GFX10
1079#undef MAP_REG2REG
1080
1081bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1082  assert(OpNo < Desc.NumOperands);
1083  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1084  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1085         OpType <= AMDGPU::OPERAND_SRC_LAST;
1086}
1087
1088bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1089  assert(OpNo < Desc.NumOperands);
1090  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1091  switch (OpType) {
1092  case AMDGPU::OPERAND_REG_IMM_FP32:
1093  case AMDGPU::OPERAND_REG_IMM_FP64:
1094  case AMDGPU::OPERAND_REG_IMM_FP16:
1095  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1096  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1097  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1098  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1099  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1100  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1101  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1102  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1103  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1104  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1105  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1106    return true;
1107  default:
1108    return false;
1109  }
1110}
1111
1112bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1113  assert(OpNo < Desc.NumOperands);
1114  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1115  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1116         OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
1117}
1118
1119// Avoid using MCRegisterClass::getSize, since that function will go away
1120// (move from MC* level to Target* level). Return size in bits.
1121unsigned getRegBitWidth(unsigned RCID) {
1122  switch (RCID) {
1123  case AMDGPU::VGPR_LO16RegClassID:
1124  case AMDGPU::VGPR_HI16RegClassID:
1125  case AMDGPU::SGPR_LO16RegClassID:
1126  case AMDGPU::AGPR_LO16RegClassID:
1127    return 16;
1128  case AMDGPU::SGPR_32RegClassID:
1129  case AMDGPU::VGPR_32RegClassID:
1130  case AMDGPU::VRegOrLds_32RegClassID:
1131  case AMDGPU::AGPR_32RegClassID:
1132  case AMDGPU::VS_32RegClassID:
1133  case AMDGPU::AV_32RegClassID:
1134  case AMDGPU::SReg_32RegClassID:
1135  case AMDGPU::SReg_32_XM0RegClassID:
1136  case AMDGPU::SRegOrLds_32RegClassID:
1137    return 32;
1138  case AMDGPU::SGPR_64RegClassID:
1139  case AMDGPU::VS_64RegClassID:
1140  case AMDGPU::AV_64RegClassID:
1141  case AMDGPU::SReg_64RegClassID:
1142  case AMDGPU::VReg_64RegClassID:
1143  case AMDGPU::AReg_64RegClassID:
1144  case AMDGPU::SReg_64_XEXECRegClassID:
1145    return 64;
1146  case AMDGPU::SGPR_96RegClassID:
1147  case AMDGPU::SReg_96RegClassID:
1148  case AMDGPU::VReg_96RegClassID:
1149  case AMDGPU::AReg_96RegClassID:
1150    return 96;
1151  case AMDGPU::SGPR_128RegClassID:
1152  case AMDGPU::SReg_128RegClassID:
1153  case AMDGPU::VReg_128RegClassID:
1154  case AMDGPU::AReg_128RegClassID:
1155    return 128;
1156  case AMDGPU::SGPR_160RegClassID:
1157  case AMDGPU::SReg_160RegClassID:
1158  case AMDGPU::VReg_160RegClassID:
1159  case AMDGPU::AReg_160RegClassID:
1160    return 160;
1161  case AMDGPU::SGPR_192RegClassID:
1162  case AMDGPU::SReg_192RegClassID:
1163  case AMDGPU::VReg_192RegClassID:
1164  case AMDGPU::AReg_192RegClassID:
1165    return 192;
1166  case AMDGPU::SGPR_256RegClassID:
1167  case AMDGPU::SReg_256RegClassID:
1168  case AMDGPU::VReg_256RegClassID:
1169  case AMDGPU::AReg_256RegClassID:
1170    return 256;
1171  case AMDGPU::SGPR_512RegClassID:
1172  case AMDGPU::SReg_512RegClassID:
1173  case AMDGPU::VReg_512RegClassID:
1174  case AMDGPU::AReg_512RegClassID:
1175    return 512;
1176  case AMDGPU::SGPR_1024RegClassID:
1177  case AMDGPU::SReg_1024RegClassID:
1178  case AMDGPU::VReg_1024RegClassID:
1179  case AMDGPU::AReg_1024RegClassID:
1180    return 1024;
1181  default:
1182    llvm_unreachable("Unexpected register class");
1183  }
1184}
1185
1186unsigned getRegBitWidth(const MCRegisterClass &RC) {
1187  return getRegBitWidth(RC.getID());
1188}
1189
1190unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1191                           unsigned OpNo) {
1192  assert(OpNo < Desc.NumOperands);
1193  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1194  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
1195}
1196
1197bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
1198  if (isInlinableIntLiteral(Literal))
1199    return true;
1200
1201  uint64_t Val = static_cast<uint64_t>(Literal);
1202  return (Val == DoubleToBits(0.0)) ||
1203         (Val == DoubleToBits(1.0)) ||
1204         (Val == DoubleToBits(-1.0)) ||
1205         (Val == DoubleToBits(0.5)) ||
1206         (Val == DoubleToBits(-0.5)) ||
1207         (Val == DoubleToBits(2.0)) ||
1208         (Val == DoubleToBits(-2.0)) ||
1209         (Val == DoubleToBits(4.0)) ||
1210         (Val == DoubleToBits(-4.0)) ||
1211         (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
1212}
1213
1214bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
1215  if (isInlinableIntLiteral(Literal))
1216    return true;
1217
1218  // The actual type of the operand does not seem to matter as long
1219  // as the bits match one of the inline immediate values.  For example:
1220  //
1221  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1222  // so it is a legal inline immediate.
1223  //
1224  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1225  // floating-point, so it is a legal inline immediate.
1226
1227  uint32_t Val = static_cast<uint32_t>(Literal);
1228  return (Val == FloatToBits(0.0f)) ||
1229         (Val == FloatToBits(1.0f)) ||
1230         (Val == FloatToBits(-1.0f)) ||
1231         (Val == FloatToBits(0.5f)) ||
1232         (Val == FloatToBits(-0.5f)) ||
1233         (Val == FloatToBits(2.0f)) ||
1234         (Val == FloatToBits(-2.0f)) ||
1235         (Val == FloatToBits(4.0f)) ||
1236         (Val == FloatToBits(-4.0f)) ||
1237         (Val == 0x3e22f983 && HasInv2Pi);
1238}
1239
1240bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
1241  if (!HasInv2Pi)
1242    return false;
1243
1244  if (isInlinableIntLiteral(Literal))
1245    return true;
1246
1247  uint16_t Val = static_cast<uint16_t>(Literal);
1248  return Val == 0x3C00 || // 1.0
1249         Val == 0xBC00 || // -1.0
1250         Val == 0x3800 || // 0.5
1251         Val == 0xB800 || // -0.5
1252         Val == 0x4000 || // 2.0
1253         Val == 0xC000 || // -2.0
1254         Val == 0x4400 || // 4.0
1255         Val == 0xC400 || // -4.0
1256         Val == 0x3118;   // 1/2pi
1257}
1258
1259bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1260  assert(HasInv2Pi);
1261
1262  if (isInt<16>(Literal) || isUInt<16>(Literal)) {
1263    int16_t Trunc = static_cast<int16_t>(Literal);
1264    return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
1265  }
1266  if (!(Literal & 0xffff))
1267    return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
1268
1269  int16_t Lo16 = static_cast<int16_t>(Literal);
1270  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1271  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
1272}
1273
1274bool isInlinableIntLiteralV216(int32_t Literal) {
1275  int16_t Lo16 = static_cast<int16_t>(Literal);
1276  if (isInt<16>(Literal) || isUInt<16>(Literal))
1277    return isInlinableIntLiteral(Lo16);
1278
1279  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1280  if (!(Literal & 0xffff))
1281    return isInlinableIntLiteral(Hi16);
1282  return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
1283}
1284
1285bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1286  assert(HasInv2Pi);
1287
1288  int16_t Lo16 = static_cast<int16_t>(Literal);
1289  if (isInt<16>(Literal) || isUInt<16>(Literal))
1290    return true;
1291
1292  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1293  if (!(Literal & 0xffff))
1294    return true;
1295  return Lo16 == Hi16;
1296}
1297
1298bool isArgPassedInSGPR(const Argument *A) {
1299  const Function *F = A->getParent();
1300
1301  // Arguments to compute shaders are never a source of divergence.
1302  CallingConv::ID CC = F->getCallingConv();
1303  switch (CC) {
1304  case CallingConv::AMDGPU_KERNEL:
1305  case CallingConv::SPIR_KERNEL:
1306    return true;
1307  case CallingConv::AMDGPU_VS:
1308  case CallingConv::AMDGPU_LS:
1309  case CallingConv::AMDGPU_HS:
1310  case CallingConv::AMDGPU_ES:
1311  case CallingConv::AMDGPU_GS:
1312  case CallingConv::AMDGPU_PS:
1313  case CallingConv::AMDGPU_CS:
1314    // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
1315    // Everything else is in VGPRs.
1316    return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
1317           F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
1318  default:
1319    // TODO: Should calls support inreg for SGPR inputs?
1320    return false;
1321  }
1322}
1323
1324static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
1325  return isGCN3Encoding(ST) || isGFX10(ST);
1326}
1327
1328static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
1329  return isGFX9(ST) || isGFX10(ST);
1330}
1331
1332bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1333                                      int64_t EncodedOffset) {
1334  return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
1335                               : isUInt<8>(EncodedOffset);
1336}
1337
1338bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1339                                    int64_t EncodedOffset,
1340                                    bool IsBuffer) {
1341  return !IsBuffer &&
1342         hasSMRDSignedImmOffset(ST) &&
1343         isInt<21>(EncodedOffset);
1344}
1345
1346static bool isDwordAligned(uint64_t ByteOffset) {
1347  return (ByteOffset & 3) == 0;
1348}
1349
1350uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
1351                                uint64_t ByteOffset) {
1352  if (hasSMEMByteOffset(ST))
1353    return ByteOffset;
1354
1355  assert(isDwordAligned(ByteOffset));
1356  return ByteOffset >> 2;
1357}
1358
1359Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1360                                       int64_t ByteOffset, bool IsBuffer) {
1361  // The signed version is always a byte offset.
1362  if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
1363    assert(hasSMEMByteOffset(ST));
1364    return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None;
1365  }
1366
1367  if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
1368    return None;
1369
1370  int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1371  return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
1372             ? Optional<int64_t>(EncodedOffset)
1373             : None;
1374}
1375
1376Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1377                                                int64_t ByteOffset) {
1378  if (!isCI(ST) || !isDwordAligned(ByteOffset))
1379    return None;
1380
1381  int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1382  return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
1383}
1384
1385// Given Imm, split it into the values to put into the SOffset and ImmOffset
1386// fields in an MUBUF instruction. Return false if it is not possible (due to a
1387// hardware bug needing a workaround).
1388//
1389// The required alignment ensures that individual address components remain
1390// aligned if they are aligned to begin with. It also ensures that additional
1391// offsets within the given alignment can be added to the resulting ImmOffset.
1392bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1393                      const GCNSubtarget *Subtarget, Align Alignment) {
1394  const uint32_t MaxImm = alignDown(4095, Alignment.value());
1395  uint32_t Overflow = 0;
1396
1397  if (Imm > MaxImm) {
1398    if (Imm <= MaxImm + 64) {
1399      // Use an SOffset inline constant for 4..64
1400      Overflow = Imm - MaxImm;
1401      Imm = MaxImm;
1402    } else {
1403      // Try to keep the same value in SOffset for adjacent loads, so that
1404      // the corresponding register contents can be re-used.
1405      //
1406      // Load values with all low-bits (except for alignment bits) set into
1407      // SOffset, so that a larger range of values can be covered using
1408      // s_movk_i32.
1409      //
1410      // Atomic operations fail to work correctly when individual address
1411      // components are unaligned, even if their sum is aligned.
1412      uint32_t High = (Imm + Alignment.value()) & ~4095;
1413      uint32_t Low = (Imm + Alignment.value()) & 4095;
1414      Imm = Low;
1415      Overflow = High - Alignment.value();
1416    }
1417  }
1418
1419  // There is a hardware bug in SI and CI which prevents address clamping in
1420  // MUBUF instructions from working correctly with SOffsets. The immediate
1421  // offset is unaffected.
1422  if (Overflow > 0 &&
1423      Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1424    return false;
1425
1426  ImmOffset = Imm;
1427  SOffset = Overflow;
1428  return true;
1429}
1430
1431SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
1432  *this = getDefaultForCallingConv(F.getCallingConv());
1433
1434  StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
1435  if (!IEEEAttr.empty())
1436    IEEE = IEEEAttr == "true";
1437
1438  StringRef DX10ClampAttr
1439    = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
1440  if (!DX10ClampAttr.empty())
1441    DX10Clamp = DX10ClampAttr == "true";
1442
1443  StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
1444  if (!DenormF32Attr.empty()) {
1445    DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
1446    FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1447    FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1448  }
1449
1450  StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
1451  if (!DenormAttr.empty()) {
1452    DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
1453
1454    if (DenormF32Attr.empty()) {
1455      FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1456      FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1457    }
1458
1459    FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1460    FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1461  }
1462}
1463
1464namespace {
1465
1466struct SourceOfDivergence {
1467  unsigned Intr;
1468};
1469const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1470
1471#define GET_SourcesOfDivergence_IMPL
1472#define GET_Gfx9BufferFormat_IMPL
1473#define GET_Gfx10PlusBufferFormat_IMPL
1474#include "AMDGPUGenSearchableTables.inc"
1475
1476} // end anonymous namespace
1477
1478bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1479  return lookupSourceOfDivergence(IntrID);
1480}
1481
1482const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
1483                                                  uint8_t NumComponents,
1484                                                  uint8_t NumFormat,
1485                                                  const MCSubtargetInfo &STI) {
1486  return isGFX10(STI)
1487             ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
1488                                            NumFormat)
1489             : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
1490}
1491
1492const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
1493                                                  const MCSubtargetInfo &STI) {
1494  return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format)
1495                      : getGfx9BufferFormatInfo(Format);
1496}
1497
1498} // namespace AMDGPU
1499} // namespace llvm
1500