1//===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a target parser to recognise hardware features such as
10// FPU/CPU/ARCH names as well as specific support such as HDIV, etc.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/TargetParser/TargetParser.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/TargetParser/Triple.h"
17
18using namespace llvm;
19using namespace AMDGPU;
20
21namespace {
22
23struct GPUInfo {
24  StringLiteral Name;
25  StringLiteral CanonicalName;
26  AMDGPU::GPUKind Kind;
27  unsigned Features;
28};
29
30constexpr GPUInfo R600GPUs[] = {
31  // Name       Canonical    Kind        Features
32  //            Name
33  {{"r600"},    {"r600"},    GK_R600,    FEATURE_NONE },
34  {{"rv630"},   {"r600"},    GK_R600,    FEATURE_NONE },
35  {{"rv635"},   {"r600"},    GK_R600,    FEATURE_NONE },
36  {{"r630"},    {"r630"},    GK_R630,    FEATURE_NONE },
37  {{"rs780"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
38  {{"rs880"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
39  {{"rv610"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
40  {{"rv620"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
41  {{"rv670"},   {"rv670"},   GK_RV670,   FEATURE_NONE },
42  {{"rv710"},   {"rv710"},   GK_RV710,   FEATURE_NONE },
43  {{"rv730"},   {"rv730"},   GK_RV730,   FEATURE_NONE },
44  {{"rv740"},   {"rv770"},   GK_RV770,   FEATURE_NONE },
45  {{"rv770"},   {"rv770"},   GK_RV770,   FEATURE_NONE },
46  {{"cedar"},   {"cedar"},   GK_CEDAR,   FEATURE_NONE },
47  {{"palm"},    {"cedar"},   GK_CEDAR,   FEATURE_NONE },
48  {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA  },
49  {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA  },
50  {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE },
51  {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE },
52  {{"sumo"},    {"sumo"},    GK_SUMO,    FEATURE_NONE },
53  {{"sumo2"},   {"sumo"},    GK_SUMO,    FEATURE_NONE },
54  {{"barts"},   {"barts"},   GK_BARTS,   FEATURE_NONE },
55  {{"caicos"},  {"caicos"},  GK_CAICOS,  FEATURE_NONE },
56  {{"aruba"},   {"cayman"},  GK_CAYMAN,  FEATURE_FMA  },
57  {{"cayman"},  {"cayman"},  GK_CAYMAN,  FEATURE_FMA  },
58  {{"turks"},   {"turks"},   GK_TURKS,   FEATURE_NONE }
59};
60
61// This table should be sorted by the value of GPUKind
62// Don't bother listing the implicitly true features
63constexpr GPUInfo AMDGCNGPUs[] = {
64    // clang-format off
65    // Name         Canonical    Kind        Features
66    //              Name
67    {{"gfx600"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
68    {{"tahiti"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
69    {{"gfx601"},    {"gfx601"},  GK_GFX601,  FEATURE_NONE},
70    {{"pitcairn"},  {"gfx601"},  GK_GFX601,  FEATURE_NONE},
71    {{"verde"},     {"gfx601"},  GK_GFX601,  FEATURE_NONE},
72    {{"gfx602"},    {"gfx602"},  GK_GFX602,  FEATURE_NONE},
73    {{"hainan"},    {"gfx602"},  GK_GFX602,  FEATURE_NONE},
74    {{"oland"},     {"gfx602"},  GK_GFX602,  FEATURE_NONE},
75    {{"gfx700"},    {"gfx700"},  GK_GFX700,  FEATURE_NONE},
76    {{"kaveri"},    {"gfx700"},  GK_GFX700,  FEATURE_NONE},
77    {{"gfx701"},    {"gfx701"},  GK_GFX701,  FEATURE_FAST_FMA_F32},
78    {{"hawaii"},    {"gfx701"},  GK_GFX701,  FEATURE_FAST_FMA_F32},
79    {{"gfx702"},    {"gfx702"},  GK_GFX702,  FEATURE_FAST_FMA_F32},
80    {{"gfx703"},    {"gfx703"},  GK_GFX703,  FEATURE_NONE},
81    {{"kabini"},    {"gfx703"},  GK_GFX703,  FEATURE_NONE},
82    {{"mullins"},   {"gfx703"},  GK_GFX703,  FEATURE_NONE},
83    {{"gfx704"},    {"gfx704"},  GK_GFX704,  FEATURE_NONE},
84    {{"bonaire"},   {"gfx704"},  GK_GFX704,  FEATURE_NONE},
85    {{"gfx705"},    {"gfx705"},  GK_GFX705,  FEATURE_NONE},
86    {{"gfx801"},    {"gfx801"},  GK_GFX801,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
87    {{"carrizo"},   {"gfx801"},  GK_GFX801,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
88    {{"gfx802"},    {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
89    {{"iceland"},   {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
90    {{"tonga"},     {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
91    {{"gfx803"},    {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
92    {{"fiji"},      {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
93    {{"polaris10"}, {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
94    {{"polaris11"}, {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
95    {{"gfx805"},    {"gfx805"},  GK_GFX805,  FEATURE_FAST_DENORMAL_F32},
96    {{"tongapro"},  {"gfx805"},  GK_GFX805,  FEATURE_FAST_DENORMAL_F32},
97    {{"gfx810"},    {"gfx810"},  GK_GFX810,  FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
98    {{"stoney"},    {"gfx810"},  GK_GFX810,  FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
99    {{"gfx900"},    {"gfx900"},  GK_GFX900,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
100    {{"gfx902"},    {"gfx902"},  GK_GFX902,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
101    {{"gfx904"},    {"gfx904"},  GK_GFX904,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
102    {{"gfx906"},    {"gfx906"},  GK_GFX906,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
103    {{"gfx908"},    {"gfx908"},  GK_GFX908,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
104    {{"gfx909"},    {"gfx909"},  GK_GFX909,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
105    {{"gfx90a"},    {"gfx90a"},  GK_GFX90A,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
106    {{"gfx90c"},    {"gfx90c"},  GK_GFX90C,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
107    {{"gfx940"},    {"gfx940"},  GK_GFX940,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
108    {{"gfx941"},    {"gfx941"},  GK_GFX941,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
109    {{"gfx942"},    {"gfx942"},  GK_GFX942,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
110    {{"gfx1010"},   {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
111    {{"gfx1011"},   {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
112    {{"gfx1012"},   {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
113    {{"gfx1013"},   {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
114    {{"gfx1030"},   {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
115    {{"gfx1031"},   {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
116    {{"gfx1032"},   {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
117    {{"gfx1033"},   {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
118    {{"gfx1034"},   {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
119    {{"gfx1035"},   {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
120    {{"gfx1036"},   {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
121    {{"gfx1100"},   {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
122    {{"gfx1101"},   {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
123    {{"gfx1102"},   {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
124    {{"gfx1103"},   {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
125    {{"gfx1150"},   {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
126    {{"gfx1151"},   {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
127    {{"gfx1200"},   {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
128    {{"gfx1201"},   {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
129    // clang-format on
130};
131
132const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
133  GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
134
135  auto I =
136      llvm::lower_bound(Table, Search, [](const GPUInfo &A, const GPUInfo &B) {
137        return A.Kind < B.Kind;
138      });
139
140  if (I == Table.end() || I->Kind != Search.Kind)
141    return nullptr;
142  return I;
143}
144
145} // namespace
146
147StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
148  if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
149    return Entry->CanonicalName;
150  return "";
151}
152
153StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) {
154  if (const auto *Entry = getArchEntry(AK, R600GPUs))
155    return Entry->CanonicalName;
156  return "";
157}
158
159AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
160  for (const auto &C : AMDGCNGPUs) {
161    if (CPU == C.Name)
162      return C.Kind;
163  }
164
165  return AMDGPU::GPUKind::GK_NONE;
166}
167
168AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) {
169  for (const auto &C : R600GPUs) {
170    if (CPU == C.Name)
171      return C.Kind;
172  }
173
174  return AMDGPU::GPUKind::GK_NONE;
175}
176
177unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) {
178  if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
179    return Entry->Features;
180  return FEATURE_NONE;
181}
182
183unsigned AMDGPU::getArchAttrR600(GPUKind AK) {
184  if (const auto *Entry = getArchEntry(AK, R600GPUs))
185    return Entry->Features;
186  return FEATURE_NONE;
187}
188
189void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) {
190  // XXX: Should this only report unique canonical names?
191  for (const auto &C : AMDGCNGPUs)
192    Values.push_back(C.Name);
193}
194
195void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
196  for (const auto &C : R600GPUs)
197    Values.push_back(C.Name);
198}
199
200AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
201  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
202  if (AK == AMDGPU::GPUKind::GK_NONE) {
203    if (GPU == "generic-hsa")
204      return {7, 0, 0};
205    if (GPU == "generic")
206      return {6, 0, 0};
207    return {0, 0, 0};
208  }
209
210  // clang-format off
211  switch (AK) {
212  case GK_GFX600:  return {6, 0, 0};
213  case GK_GFX601:  return {6, 0, 1};
214  case GK_GFX602:  return {6, 0, 2};
215  case GK_GFX700:  return {7, 0, 0};
216  case GK_GFX701:  return {7, 0, 1};
217  case GK_GFX702:  return {7, 0, 2};
218  case GK_GFX703:  return {7, 0, 3};
219  case GK_GFX704:  return {7, 0, 4};
220  case GK_GFX705:  return {7, 0, 5};
221  case GK_GFX801:  return {8, 0, 1};
222  case GK_GFX802:  return {8, 0, 2};
223  case GK_GFX803:  return {8, 0, 3};
224  case GK_GFX805:  return {8, 0, 5};
225  case GK_GFX810:  return {8, 1, 0};
226  case GK_GFX900:  return {9, 0, 0};
227  case GK_GFX902:  return {9, 0, 2};
228  case GK_GFX904:  return {9, 0, 4};
229  case GK_GFX906:  return {9, 0, 6};
230  case GK_GFX908:  return {9, 0, 8};
231  case GK_GFX909:  return {9, 0, 9};
232  case GK_GFX90A:  return {9, 0, 10};
233  case GK_GFX90C:  return {9, 0, 12};
234  case GK_GFX940:  return {9, 4, 0};
235  case GK_GFX941:  return {9, 4, 1};
236  case GK_GFX942:  return {9, 4, 2};
237  case GK_GFX1010: return {10, 1, 0};
238  case GK_GFX1011: return {10, 1, 1};
239  case GK_GFX1012: return {10, 1, 2};
240  case GK_GFX1013: return {10, 1, 3};
241  case GK_GFX1030: return {10, 3, 0};
242  case GK_GFX1031: return {10, 3, 1};
243  case GK_GFX1032: return {10, 3, 2};
244  case GK_GFX1033: return {10, 3, 3};
245  case GK_GFX1034: return {10, 3, 4};
246  case GK_GFX1035: return {10, 3, 5};
247  case GK_GFX1036: return {10, 3, 6};
248  case GK_GFX1100: return {11, 0, 0};
249  case GK_GFX1101: return {11, 0, 1};
250  case GK_GFX1102: return {11, 0, 2};
251  case GK_GFX1103: return {11, 0, 3};
252  case GK_GFX1150: return {11, 5, 0};
253  case GK_GFX1151: return {11, 5, 1};
254  case GK_GFX1200: return {12, 0, 0};
255  case GK_GFX1201: return {12, 0, 1};
256  default:         return {0, 0, 0};
257  }
258  // clang-format on
259}
260
261StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
262  assert(T.isAMDGPU());
263  auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch);
264  if (ProcKind == GK_NONE)
265    return StringRef();
266
267  return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
268}
269
270void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
271                                  StringMap<bool> &Features) {
272  // XXX - What does the member GPU mean if device name string passed here?
273  if (T.isAMDGCN()) {
274    switch (parseArchAMDGCN(GPU)) {
275    case GK_GFX1201:
276    case GK_GFX1200:
277      Features["ci-insts"] = true;
278      Features["dot7-insts"] = true;
279      Features["dot8-insts"] = true;
280      Features["dot9-insts"] = true;
281      Features["dot10-insts"] = true;
282      Features["dl-insts"] = true;
283      Features["atomic-ds-pk-add-16-insts"] = true;
284      Features["atomic-flat-pk-add-16-insts"] = true;
285      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
286      Features["atomic-global-pk-add-bf16-inst"] = true;
287      Features["16-bit-insts"] = true;
288      Features["dpp"] = true;
289      Features["gfx8-insts"] = true;
290      Features["gfx9-insts"] = true;
291      Features["gfx10-insts"] = true;
292      Features["gfx10-3-insts"] = true;
293      Features["gfx11-insts"] = true;
294      Features["gfx12-insts"] = true;
295      Features["atomic-fadd-rtn-insts"] = true;
296      Features["image-insts"] = true;
297      Features["fp8-conversion-insts"] = true;
298      break;
299    case GK_GFX1151:
300    case GK_GFX1150:
301    case GK_GFX1103:
302    case GK_GFX1102:
303    case GK_GFX1101:
304    case GK_GFX1100:
305      Features["ci-insts"] = true;
306      Features["dot5-insts"] = true;
307      Features["dot7-insts"] = true;
308      Features["dot8-insts"] = true;
309      Features["dot9-insts"] = true;
310      Features["dot10-insts"] = true;
311      Features["dl-insts"] = true;
312      Features["16-bit-insts"] = true;
313      Features["dpp"] = true;
314      Features["gfx8-insts"] = true;
315      Features["gfx9-insts"] = true;
316      Features["gfx10-insts"] = true;
317      Features["gfx10-3-insts"] = true;
318      Features["gfx11-insts"] = true;
319      Features["atomic-fadd-rtn-insts"] = true;
320      Features["image-insts"] = true;
321      Features["gws"] = true;
322      break;
323    case GK_GFX1036:
324    case GK_GFX1035:
325    case GK_GFX1034:
326    case GK_GFX1033:
327    case GK_GFX1032:
328    case GK_GFX1031:
329    case GK_GFX1030:
330      Features["ci-insts"] = true;
331      Features["dot1-insts"] = true;
332      Features["dot2-insts"] = true;
333      Features["dot5-insts"] = true;
334      Features["dot6-insts"] = true;
335      Features["dot7-insts"] = true;
336      Features["dot10-insts"] = true;
337      Features["dl-insts"] = true;
338      Features["16-bit-insts"] = true;
339      Features["dpp"] = true;
340      Features["gfx8-insts"] = true;
341      Features["gfx9-insts"] = true;
342      Features["gfx10-insts"] = true;
343      Features["gfx10-3-insts"] = true;
344      Features["image-insts"] = true;
345      Features["s-memrealtime"] = true;
346      Features["s-memtime-inst"] = true;
347      Features["gws"] = true;
348      break;
349    case GK_GFX1012:
350    case GK_GFX1011:
351      Features["dot1-insts"] = true;
352      Features["dot2-insts"] = true;
353      Features["dot5-insts"] = true;
354      Features["dot6-insts"] = true;
355      Features["dot7-insts"] = true;
356      Features["dot10-insts"] = true;
357      [[fallthrough]];
358    case GK_GFX1013:
359    case GK_GFX1010:
360      Features["dl-insts"] = true;
361      Features["ci-insts"] = true;
362      Features["16-bit-insts"] = true;
363      Features["dpp"] = true;
364      Features["gfx8-insts"] = true;
365      Features["gfx9-insts"] = true;
366      Features["gfx10-insts"] = true;
367      Features["image-insts"] = true;
368      Features["s-memrealtime"] = true;
369      Features["s-memtime-inst"] = true;
370      Features["gws"] = true;
371      break;
372    case GK_GFX942:
373    case GK_GFX941:
374    case GK_GFX940:
375      Features["gfx940-insts"] = true;
376      Features["fp8-insts"] = true;
377      Features["fp8-conversion-insts"] = true;
378      Features["atomic-ds-pk-add-16-insts"] = true;
379      Features["atomic-flat-pk-add-16-insts"] = true;
380      Features["atomic-global-pk-add-bf16-inst"] = true;
381      Features["gfx90a-insts"] = true;
382      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
383      Features["atomic-fadd-rtn-insts"] = true;
384      Features["dot3-insts"] = true;
385      Features["dot4-insts"] = true;
386      Features["dot5-insts"] = true;
387      Features["dot6-insts"] = true;
388      Features["mai-insts"] = true;
389      Features["dl-insts"] = true;
390      Features["dot1-insts"] = true;
391      Features["dot2-insts"] = true;
392      Features["dot7-insts"] = true;
393      Features["dot10-insts"] = true;
394      Features["gfx9-insts"] = true;
395      Features["gfx8-insts"] = true;
396      Features["16-bit-insts"] = true;
397      Features["dpp"] = true;
398      Features["s-memrealtime"] = true;
399      Features["ci-insts"] = true;
400      Features["s-memtime-inst"] = true;
401      Features["gws"] = true;
402      break;
403    case GK_GFX90A:
404      Features["gfx90a-insts"] = true;
405      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
406      Features["atomic-fadd-rtn-insts"] = true;
407      [[fallthrough]];
408    case GK_GFX908:
409      Features["dot3-insts"] = true;
410      Features["dot4-insts"] = true;
411      Features["dot5-insts"] = true;
412      Features["dot6-insts"] = true;
413      Features["mai-insts"] = true;
414      [[fallthrough]];
415    case GK_GFX906:
416      Features["dl-insts"] = true;
417      Features["dot1-insts"] = true;
418      Features["dot2-insts"] = true;
419      Features["dot7-insts"] = true;
420      Features["dot10-insts"] = true;
421      [[fallthrough]];
422    case GK_GFX90C:
423    case GK_GFX909:
424    case GK_GFX904:
425    case GK_GFX902:
426    case GK_GFX900:
427      Features["gfx9-insts"] = true;
428      [[fallthrough]];
429    case GK_GFX810:
430    case GK_GFX805:
431    case GK_GFX803:
432    case GK_GFX802:
433    case GK_GFX801:
434      Features["gfx8-insts"] = true;
435      Features["16-bit-insts"] = true;
436      Features["dpp"] = true;
437      Features["s-memrealtime"] = true;
438      [[fallthrough]];
439    case GK_GFX705:
440    case GK_GFX704:
441    case GK_GFX703:
442    case GK_GFX702:
443    case GK_GFX701:
444    case GK_GFX700:
445      Features["ci-insts"] = true;
446      [[fallthrough]];
447    case GK_GFX602:
448    case GK_GFX601:
449    case GK_GFX600:
450      Features["image-insts"] = true;
451      Features["s-memtime-inst"] = true;
452      Features["gws"] = true;
453      break;
454    case GK_NONE:
455      break;
456    default:
457      llvm_unreachable("Unhandled GPU!");
458    }
459  } else {
460    if (GPU.empty())
461      GPU = "r600";
462
463    switch (llvm::AMDGPU::parseArchR600(GPU)) {
464    case GK_CAYMAN:
465    case GK_CYPRESS:
466    case GK_RV770:
467    case GK_RV670:
468      // TODO: Add fp64 when implemented.
469      break;
470    case GK_TURKS:
471    case GK_CAICOS:
472    case GK_BARTS:
473    case GK_SUMO:
474    case GK_REDWOOD:
475    case GK_JUNIPER:
476    case GK_CEDAR:
477    case GK_RV730:
478    case GK_RV710:
479    case GK_RS880:
480    case GK_R630:
481    case GK_R600:
482      break;
483    default:
484      llvm_unreachable("Unhandled GPU!");
485    }
486  }
487}
488
489static bool isWave32Capable(StringRef GPU, const Triple &T) {
490  bool IsWave32Capable = false;
491  // XXX - What does the member GPU mean if device name string passed here?
492  if (T.isAMDGCN()) {
493    switch (parseArchAMDGCN(GPU)) {
494    case GK_GFX1201:
495    case GK_GFX1200:
496    case GK_GFX1151:
497    case GK_GFX1150:
498    case GK_GFX1103:
499    case GK_GFX1102:
500    case GK_GFX1101:
501    case GK_GFX1100:
502    case GK_GFX1036:
503    case GK_GFX1035:
504    case GK_GFX1034:
505    case GK_GFX1033:
506    case GK_GFX1032:
507    case GK_GFX1031:
508    case GK_GFX1030:
509    case GK_GFX1012:
510    case GK_GFX1011:
511    case GK_GFX1013:
512    case GK_GFX1010:
513      IsWave32Capable = true;
514      break;
515    default:
516      break;
517    }
518  }
519  return IsWave32Capable;
520}
521
522bool AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
523                                   StringMap<bool> &Features,
524                                   std::string &ErrorMsg) {
525  bool IsWave32Capable = isWave32Capable(GPU, T);
526  const bool IsNullGPU = GPU.empty();
527  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
528  const bool HaveWave32 =
529      (IsWave32Capable || IsNullGPU) && Features.count("wavefrontsize32");
530  const bool HaveWave64 = Features.count("wavefrontsize64");
531  if (HaveWave32 && HaveWave64) {
532    ErrorMsg = "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
533    return false;
534  }
535  // Don't assume any wavesize with an unknown subtarget.
536  if (!IsNullGPU) {
537    // Default to wave32 if available, or wave64 if not
538    if (!HaveWave32 && !HaveWave64) {
539      StringRef DefaultWaveSizeFeature =
540          IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
541      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
542    }
543  }
544  return true;
545}
546